1 /* $NetBSD: ip_match.c,v 1.1.1.2 2013/01/02 18:59:13 tron Exp $ */
2
3 /*++
4 /* NAME
5 /* ip_match 3
6 /* SUMMARY
7 /* IP address pattern matching
8 /* SYNOPSIS
9 /* #include <ip_match.h>
10 /*
11 /* char *ip_match_parse(byte_codes, pattern)
12 /* VSTRING *byte_codes;
13 /* char *pattern;
14 /*
15 /* char *ip_match_save(byte_codes)
16 /* const VSTRING *byte_codes;
17 /*
18 /* int ip_match_execute(byte_codes, addr_bytes)
19 /* cost char *byte_codes;
20 /* const char *addr_bytes;
21 /*
22 /* char *ip_match_dump(printable, byte_codes)
23 /* VSTRING *printable;
24 /* const char *byte_codes;
25 /* DESCRIPTION
26 /* This module supports IP address pattern matching. See below
27 /* for a description of the supported address pattern syntax.
28 /*
29 /* This implementation aims to minimize the cost of encoding
30 /* the pattern in internal form, while still providing good
31 /* matching performance in the typical case. The first byte
32 /* of an encoded pattern specifies the expected address family
33 /* (for example, AF_INET); other details of the encoding are
34 /* private and are subject to change.
35 /*
36 /* ip_match_parse() converts the user-specified pattern to
37 /* internal form. The result value is a null pointer in case
38 /* of success, or a pointer into the byte_codes buffer with a
39 /* detailed problem description.
40 /*
41 /* ip_match_save() saves the result from ip_match_parse() for
42 /* longer-term usage. The result should be passed to myfree().
43 /*
44 /* ip_match_execute() matches a binary network in addr_bytes
45 /* against a byte-code array in byte_codes. It is an error to
46 /* use different address families for the byte_codes and addr_bytes
47 /* arguments (the first byte-code value contains the expected
48 /* address family). The result is non-zero in case of success.
49 /*
50 /* ip_match_dump() produces an ASCII dump of a byte-code array.
51 /* The dump is supposed to be identical to the input pattern
52 /* modulo upper/lower case or leading nulls with IPv6). This
53 /* function is primarily a debugging aid.
54 /*
55 /* Arguments
56 /* .IP addr_bytes
57 /* Binary network address in network-byte order.
58 /* .IP byte_codes
59 /* Byte-code array produced by ip_match_parse().
60 /* .IP pattern
61 /* Human-readable address pattern.
62 /* .IP printable
63 /* storage for ASCII dump of a byte-code array.
64 /* IPV4 PATTERN SYNTAX
65 /* .ad
66 /* .fi
67 /* An IPv4 address pattern has four fields separated by ".".
68 /* Each field is either a decimal number, or a sequence inside
69 /* "[]" that contains one or more ";"-separated decimal
70 /* numbers or number..number ranges.
71 /*
72 /* Examples of patterns are 1.2.3.4 (matches itself, as one
73 /* would expect) and 1.2.3.[2,4,6..8] (matches 1.2.3.2, 1.2.3.4,
74 /* 1.2.3.6, 1.2.3.7, 1.2.3.8).
75 /*
76 /* Thus, any pattern field can be a sequence inside "[]", but
77 /* a "[]" sequence cannot span multiple address fields, and
78 /* a pattern field cannot contain both a number and a "[]"
79 /* sequence at the same time.
80 /*
81 /* This means that the pattern 1.2.[3.4] is not valid (the
82 /* sequence [3.4] cannot span two address fields) and the
83 /* pattern 1.2.3.3[6..9] is also not valid (the last field
84 /* cannot be both number 3 and sequence [6..9] at the same
85 /* time).
86 /*
87 /* The syntax for IPv4 patterns is as follows:
88 /*
89 /* .in +5
90 /* v4pattern = v4field "." v4field "." v4field "." v4field
91 /* .br
92 /* v4field = v4octet | "[" v4sequence "]"
93 /* .br
94 /* v4octet = any decimal number in the range 0 through 255
95 /* .br
96 /* v4sequence = v4seq_member | v4sequence ";" v4seq_member
97 /* .br
98 /* v4seq_member = v4octet | v4octet ".." v4octet
99 /* .in
100 /* LICENSE
101 /* .ad
102 /* .fi
103 /* The Secure Mailer license must be distributed with this
104 /* software.
105 /* AUTHOR(S)
106 /* Wietse Venema
107 /* IBM T.J. Watson Research
108 /* P.O. Box 704
109 /* Yorktown Heights, NY 10598, USA
110 /*--*/
111
112 /* System library. */
113
114 #include <sys_defs.h>
115 #include <sys/socket.h>
116 #include <ctype.h>
117 #include <string.h>
118
119 /* Utility library. */
120
121 #include <msg.h>
122 #include <mymalloc.h>
123 #include <vstring.h>
124 #include <ip_match.h>
125
126 /*
127 * Token values. The in-band values are also used as byte-code values.
128 */
129 #define IP_MATCH_CODE_OPEN '[' /* in-band */
130 #define IP_MATCH_CODE_CLOSE ']' /* in-band */
131 #define IP_MATCH_CODE_OVAL 'N' /* in-band */
132 #define IP_MATCH_CODE_RANGE 'R' /* in-band */
133 #define IP_MATCH_CODE_EOF '\0' /* in-band */
134 #define IP_MATCH_CODE_ERR 256 /* out-of-band */
135
136 /*
137 * SLMs.
138 */
139 #define STR vstring_str
140 #define LEN VSTRING_LEN
141
142 /* ip_match_save - make longer-term copy of byte code */
143
ip_match_save(const VSTRING * byte_codes)144 char *ip_match_save(const VSTRING *byte_codes)
145 {
146 char *dst;
147
148 dst = mymalloc(LEN(byte_codes));
149 return (memcpy(dst, STR(byte_codes), LEN(byte_codes)));
150 }
151
152 /* ip_match_dump - byte-code pretty printer */
153
ip_match_dump(VSTRING * printable,const char * byte_codes)154 char *ip_match_dump(VSTRING *printable, const char *byte_codes)
155 {
156 const char *myname = "ip_match_dump";
157 const unsigned char *bp;
158 int octet_count = 0;
159 int ch;
160
161 /*
162 * Sanity check. Use different dumping loops for AF_INET and AF_INET6.
163 */
164 if (*byte_codes != AF_INET)
165 msg_panic("%s: malformed byte-code header", myname);
166
167 /*
168 * Pretty-print and sanity-check the byte codes. Note: the loops in this
169 * code have no auto-increment at the end of the iteration. Instead, each
170 * byte-code handler bumps the byte-code pointer appropriately.
171 */
172 VSTRING_RESET(printable);
173 bp = (const unsigned char *) byte_codes + 1;
174 for (;;) {
175
176 /*
177 * Simple numeric field.
178 */
179 if ((ch = *bp++) == IP_MATCH_CODE_OVAL) {
180 vstring_sprintf_append(printable, "%d", *bp);
181 bp += 1;
182 }
183
184 /*
185 * Wild-card numeric field.
186 */
187 else if (ch == IP_MATCH_CODE_OPEN) {
188 vstring_sprintf_append(printable, "[");
189 for (;;) {
190 /* Numeric range. */
191 if ((ch = *bp++) == IP_MATCH_CODE_RANGE) {
192 vstring_sprintf_append(printable, "%d..%d", bp[0], bp[1]);
193 bp += 2;
194 }
195 /* Number. */
196 else if (ch == IP_MATCH_CODE_OVAL) {
197 vstring_sprintf_append(printable, "%d", *bp);
198 bp += 1;
199 }
200 /* End-of-wildcard. */
201 else if (ch == IP_MATCH_CODE_CLOSE) {
202 break;
203 }
204 /* Corruption. */
205 else {
206 msg_panic("%s: unexpected byte code (decimal %d) "
207 "after \"%s\"", myname, ch, STR(printable));
208 }
209 /* Output the wild-card field separator and repeat the loop. */
210 if (*bp != IP_MATCH_CODE_CLOSE)
211 vstring_sprintf_append(printable, ";");
212 }
213 vstring_sprintf_append(printable, "]");
214 }
215
216 /*
217 * Corruption.
218 */
219 else {
220 msg_panic("%s: unexpected byte code (decimal %d) after \"%s\"",
221 myname, ch, STR(printable));
222 }
223
224 /*
225 * Require four octets, not one more, not one less.
226 */
227 if (++octet_count == 4) {
228 if (*bp != 0)
229 msg_panic("%s: unexpected byte code (decimal %d) after \"%s\"",
230 myname, ch, STR(printable));
231 return (STR(printable));
232 }
233 if (*bp == 0)
234 msg_panic("%s: truncated byte code after \"%s\"",
235 myname, STR(printable));
236
237 /*
238 * Output the address field separator and repeat the loop.
239 */
240 vstring_sprintf_append(printable, ".");
241 }
242 }
243
244 /* ip_match_print_code_prefix - printable byte-code prefix */
245
ip_match_print_code_prefix(const char * byte_codes,size_t len)246 static char *ip_match_print_code_prefix(const char *byte_codes, size_t len)
247 {
248 static VSTRING *printable = 0;
249 const char *fmt;
250 const char *bp;
251
252 /*
253 * This is primarily for emergency debugging so we don't care about
254 * non-reentrancy.
255 */
256 if (printable == 0)
257 printable = vstring_alloc(100);
258 else
259 VSTRING_RESET(printable);
260
261 /*
262 * Use decimal for IPv4 and hexadecimal otherwise, so that address octet
263 * values are easy to recognize.
264 */
265 fmt = (*byte_codes == AF_INET ? "%d " : "%02x ");
266 for (bp = byte_codes; bp < byte_codes + len; bp++)
267 vstring_sprintf_append(printable, fmt, *(const unsigned char *) bp);
268
269 return (STR(printable));
270 }
271
272 /* ip_match_execute - byte-code matching engine */
273
ip_match_execute(const char * byte_codes,const char * addr_bytes)274 int ip_match_execute(const char *byte_codes, const char *addr_bytes)
275 {
276 const char *myname = "ip_match_execute";
277 const unsigned char *bp;
278 const unsigned char *ap;
279 int octet_count = 0;
280 int ch;
281 int matched;
282
283 /*
284 * Sanity check. Use different execute loops for AF_INET and AF_INET6.
285 */
286 if (*byte_codes != AF_INET)
287 msg_panic("%s: malformed byte-code header (decimal %d)",
288 myname, *(const unsigned char *) byte_codes);
289
290 /*
291 * Match the address bytes against the byte codes. Avoid problems with
292 * (char -> int) sign extension on architectures with signed characters.
293 */
294 bp = (const unsigned char *) byte_codes + 1;
295 ap = (const unsigned char *) addr_bytes;
296
297 for (octet_count = 0; octet_count < 4; octet_count++, ap++) {
298
299 /*
300 * Simple numeric field.
301 */
302 if ((ch = *bp++) == IP_MATCH_CODE_OVAL) {
303 if (*ap == *bp)
304 bp += 1;
305 else
306 return (0);
307 }
308
309 /*
310 * Wild-card numeric field.
311 */
312 else if (ch == IP_MATCH_CODE_OPEN) {
313 matched = 0;
314 for (;;) {
315 /* Numeric range. */
316 if ((ch = *bp++) == IP_MATCH_CODE_RANGE) {
317 if (!matched)
318 matched = (*ap >= bp[0] && *ap <= bp[1]);
319 bp += 2;
320 }
321 /* Number. */
322 else if (ch == IP_MATCH_CODE_OVAL) {
323 if (!matched)
324 matched = (*ap == *bp);
325 bp += 1;
326 }
327 /* End-of-wildcard. */
328 else if (ch == IP_MATCH_CODE_CLOSE) {
329 break;
330 }
331 /* Corruption. */
332 else {
333 size_t len = (const char *) bp - byte_codes - 1;
334
335 msg_panic("%s: unexpected byte code (decimal %d) "
336 "after \"%s\"", myname, ch,
337 ip_match_print_code_prefix(byte_codes, len));
338 }
339 }
340 if (matched == 0)
341 return (0);
342 }
343
344 /*
345 * Corruption.
346 */
347 else {
348 size_t len = (const char *) bp - byte_codes - 1;
349
350 msg_panic("%s: unexpected byte code (decimal %d) after \"%s\"",
351 myname, ch, ip_match_print_code_prefix(byte_codes, len));
352 }
353 }
354 return (1);
355 }
356
357 /* ip_match_next_token - carve out the next token from input pattern */
358
ip_match_next_token(char ** pstart,char ** psaved_start,int * poval)359 static int ip_match_next_token(char **pstart, char **psaved_start, int *poval)
360 {
361 unsigned char *cp;
362 int oval; /* octet value */
363 int type; /* token value */
364
365 /*
366 * Return a literal, error, or EOF token. Update the read pointer to the
367 * start of the next token or leave it at the string terminator.
368 */
369 #define IP_MATCH_RETURN_TOK(next, type) \
370 do { *pstart = (char *) (next); return (type); } while (0)
371
372 /*
373 * Return a token that contains an IPv4 address octet value.
374 */
375 #define IP_MATCH_RETURN_TOK_VAL(next, type, oval) do { \
376 *poval = (oval); IP_MATCH_RETURN_TOK((next), type); \
377 } while (0)
378
379 /*
380 * Light-weight tokenizer. Each result is an IPv4 address octet value, a
381 * literal character value, error, or EOF.
382 */
383 *psaved_start = *pstart;
384 cp = (unsigned char *) *pstart;
385 if (ISDIGIT(*cp)) {
386 oval = *cp - '0';
387 type = IP_MATCH_CODE_OVAL;
388 for (cp += 1; ISDIGIT(*cp); cp++) {
389 oval *= 10;
390 oval += *cp - '0';
391 if (oval > 255)
392 type = IP_MATCH_CODE_ERR;
393 }
394 IP_MATCH_RETURN_TOK_VAL(cp, type, oval);
395 } else {
396 IP_MATCH_RETURN_TOK(*cp ? cp + 1 : cp, *cp);
397 }
398 }
399
400 /* ipmatch_print_parse_error - formatted parsing error, with context */
401
ipmatch_print_parse_error(VSTRING * reply,char * start,char * here,char * next,const char * fmt,...)402 static void PRINTFLIKE(5, 6) ipmatch_print_parse_error(VSTRING *reply,
403 char *start,
404 char *here,
405 char *next,
406 const char *fmt,...)
407 {
408 va_list ap;
409 int start_width;
410 int here_width;
411
412 /*
413 * Format the error type.
414 */
415 va_start(ap, fmt);
416 vstring_vsprintf(reply, fmt, ap);
417 va_end(ap);
418
419 /*
420 * Format the error context. The syntax is complex enough that it is
421 * worth the effort to precisely indicate what input is in error.
422 *
423 * XXX Workaround for %.*s to avoid output when a zero width is specified.
424 */
425 if (start != 0) {
426 start_width = here - start;
427 here_width = next - here;
428 vstring_sprintf_append(reply, " at \"%.*s>%.*s<%s\"",
429 start_width, start_width == 0 ? "" : start,
430 here_width, here_width == 0 ? "" : here, next);
431 }
432 }
433
434 /* ip_match_parse - parse an entire wild-card address pattern */
435
ip_match_parse(VSTRING * byte_codes,char * pattern)436 char *ip_match_parse(VSTRING *byte_codes, char *pattern)
437 {
438 int octet_count;
439 char *saved_cp;
440 char *cp;
441 int token_type;
442 int look_ahead;
443 int oval;
444 int saved_oval;
445
446 /*
447 * Simplify this if we change to {} for wildcard notation.
448 */
449 #define FIND_TERMINATOR(start, cp) do { \
450 int _level = 0; \
451 for (cp = (start) ; *cp; cp++) { \
452 if (*cp == '[') _level++; \
453 if (*cp != ']') continue; \
454 if (--_level == 0) break; \
455 } \
456 } while (0)
457
458 /*
459 * Strip [] around the entire pattern.
460 */
461 if (*pattern == '[') {
462 FIND_TERMINATOR(pattern, cp);
463 if (cp[0] == 0) {
464 vstring_sprintf(byte_codes, "missing \"]\" character");
465 return (STR(byte_codes));
466 }
467 if (cp[1] == 0) {
468 *cp = 0;
469 pattern += 1;
470 }
471 }
472
473 /*
474 * Sanity check. In this case we can't show any error context.
475 */
476 if (*pattern == 0) {
477 vstring_sprintf(byte_codes, "empty address pattern");
478 return (STR(byte_codes));
479 }
480
481 /*
482 * Simple parser with on-the-fly encoding. For now, IPv4 support only.
483 * Use different parser loops for IPv4 and IPv6.
484 */
485 VSTRING_RESET(byte_codes);
486 VSTRING_ADDCH(byte_codes, AF_INET);
487 octet_count = 0;
488 cp = pattern;
489
490 /*
491 * Require four address fields separated by ".", each field containing a
492 * numeric octet value or a sequence inside []. The loop head has no test
493 * and does not step the loop variable. The tokenizer advances the loop
494 * variable, and the loop termination logic is inside the loop.
495 */
496 for (;;) {
497 switch (token_type = ip_match_next_token(&cp, &saved_cp, &oval)) {
498
499 /*
500 * Numeric address field.
501 */
502 case IP_MATCH_CODE_OVAL:
503 VSTRING_ADDCH(byte_codes, IP_MATCH_CODE_OVAL);
504 VSTRING_ADDCH(byte_codes, oval);
505 break;
506
507 /*
508 * Wild-card address field.
509 */
510 case IP_MATCH_CODE_OPEN:
511 VSTRING_ADDCH(byte_codes, IP_MATCH_CODE_OPEN);
512 /* Require ";"-separated numbers or numeric ranges. */
513 for (;;) {
514 token_type = ip_match_next_token(&cp, &saved_cp, &oval);
515 if (token_type == IP_MATCH_CODE_OVAL) {
516 saved_oval = oval;
517 look_ahead = ip_match_next_token(&cp, &saved_cp, &oval);
518 /* Numeric range. */
519 if (look_ahead == '.') {
520 /* Brute-force parsing. */
521 if (ip_match_next_token(&cp, &saved_cp, &oval) == '.'
522 && ip_match_next_token(&cp, &saved_cp, &oval)
523 == IP_MATCH_CODE_OVAL
524 && saved_oval <= oval) {
525 VSTRING_ADDCH(byte_codes, IP_MATCH_CODE_RANGE);
526 VSTRING_ADDCH(byte_codes, saved_oval);
527 VSTRING_ADDCH(byte_codes, oval);
528 look_ahead =
529 ip_match_next_token(&cp, &saved_cp, &oval);
530 } else {
531 ipmatch_print_parse_error(byte_codes, pattern,
532 saved_cp, cp,
533 "numeric range error");
534 return (STR(byte_codes));
535 }
536 }
537 /* Single number. */
538 else {
539 VSTRING_ADDCH(byte_codes, IP_MATCH_CODE_OVAL);
540 VSTRING_ADDCH(byte_codes, saved_oval);
541 }
542 /* Require ";" or end-of-wildcard. */
543 token_type = look_ahead;
544 if (token_type == ';') {
545 continue;
546 } else if (token_type == IP_MATCH_CODE_CLOSE) {
547 break;
548 } else {
549 ipmatch_print_parse_error(byte_codes, pattern,
550 saved_cp, cp,
551 "need \";\" or \"%c\"",
552 IP_MATCH_CODE_CLOSE);
553 return (STR(byte_codes));
554 }
555 } else {
556 ipmatch_print_parse_error(byte_codes, pattern, saved_cp, cp,
557 "need decimal number 0..255");
558 return (STR(byte_codes));
559 }
560 }
561 VSTRING_ADDCH(byte_codes, IP_MATCH_CODE_CLOSE);
562 break;
563
564 /*
565 * Invalid field.
566 */
567 default:
568 ipmatch_print_parse_error(byte_codes, pattern, saved_cp, cp,
569 "need decimal number 0..255 or \"%c\"",
570 IP_MATCH_CODE_OPEN);
571 return (STR(byte_codes));
572 }
573 octet_count += 1;
574
575 /*
576 * Require four address fields. Not one more, not one less.
577 */
578 if (octet_count == 4) {
579 if (*cp != 0) {
580 (void) ip_match_next_token(&cp, &saved_cp, &oval);
581 ipmatch_print_parse_error(byte_codes, pattern, saved_cp, cp,
582 "garbage after pattern");
583 return (STR(byte_codes));
584 }
585 VSTRING_ADDCH(byte_codes, 0);
586 return (0);
587 }
588
589 /*
590 * Require "." before the next address field.
591 */
592 if (ip_match_next_token(&cp, &saved_cp, &oval) != '.') {
593 ipmatch_print_parse_error(byte_codes, pattern, saved_cp, cp,
594 "need \".\"");
595 return (STR(byte_codes));
596 }
597 }
598 }
599
600 #ifdef TEST
601
602 /*
603 * Dummy main program for regression tests.
604 */
605 #include <sys/socket.h>
606 #include <netinet/in.h>
607 #include <arpa/inet.h>
608 #include <stdlib.h>
609 #include <unistd.h>
610 #include <vstream.h>
611 #include <vstring_vstream.h>
612 #include <stringops.h>
613
main(int argc,char ** argv)614 int main(int argc, char **argv)
615 {
616 VSTRING *byte_codes = vstring_alloc(100);
617 VSTRING *line_buf = vstring_alloc(100);
618 char *bufp;
619 char *err;
620 char *user_pattern;
621 char *user_address;
622 int echo_input = !isatty(0);
623
624 /*
625 * Iterate over the input stream. The input format is a pattern, followed
626 * by optional addresses to match against.
627 */
628 while (vstring_fgets_nonl(line_buf, VSTREAM_IN)) {
629 bufp = STR(line_buf);
630 if (echo_input) {
631 vstream_printf("> %s\n", bufp);
632 vstream_fflush(VSTREAM_OUT);
633 }
634 if (*bufp == '#')
635 continue;
636 if ((user_pattern = mystrtok(&bufp, " \t")) == 0)
637 continue;
638
639 /*
640 * Parse and dump the pattern.
641 */
642 if ((err = ip_match_parse(byte_codes, user_pattern)) != 0) {
643 vstream_printf("Error: %s\n", err);
644 } else {
645 vstream_printf("Code: %s\n",
646 ip_match_dump(line_buf, STR(byte_codes)));
647 }
648 vstream_fflush(VSTREAM_OUT);
649
650 /*
651 * Match the optional patterns.
652 */
653 while ((user_address = mystrtok(&bufp, " \t")) != 0) {
654 struct in_addr netw_addr;
655
656 switch (inet_pton(AF_INET, user_address, &netw_addr)) {
657 case 1:
658 vstream_printf("Match %s: %s\n", user_address,
659 ip_match_execute(STR(byte_codes),
660 (char *) &netw_addr.s_addr) ?
661 "yes" : "no");
662 break;
663 case 0:
664 vstream_printf("bad address syntax: %s\n", user_address);
665 break;
666 case -1:
667 vstream_printf("%s: %m\n", user_address);
668 break;
669 }
670 vstream_fflush(VSTREAM_OUT);
671 }
672 }
673 vstring_free(line_buf);
674 vstring_free(byte_codes);
675 exit(0);
676 }
677
678 #endif
679