1 /* $NetBSD: ip_match.c,v 1.1.1.2 2013/01/02 18:59:13 tron Exp $ */ 2 3 /*++ 4 /* NAME 5 /* ip_match 3 6 /* SUMMARY 7 /* IP address pattern matching 8 /* SYNOPSIS 9 /* #include <ip_match.h> 10 /* 11 /* char *ip_match_parse(byte_codes, pattern) 12 /* VSTRING *byte_codes; 13 /* char *pattern; 14 /* 15 /* char *ip_match_save(byte_codes) 16 /* const VSTRING *byte_codes; 17 /* 18 /* int ip_match_execute(byte_codes, addr_bytes) 19 /* cost char *byte_codes; 20 /* const char *addr_bytes; 21 /* 22 /* char *ip_match_dump(printable, byte_codes) 23 /* VSTRING *printable; 24 /* const char *byte_codes; 25 /* DESCRIPTION 26 /* This module supports IP address pattern matching. See below 27 /* for a description of the supported address pattern syntax. 28 /* 29 /* This implementation aims to minimize the cost of encoding 30 /* the pattern in internal form, while still providing good 31 /* matching performance in the typical case. The first byte 32 /* of an encoded pattern specifies the expected address family 33 /* (for example, AF_INET); other details of the encoding are 34 /* private and are subject to change. 35 /* 36 /* ip_match_parse() converts the user-specified pattern to 37 /* internal form. The result value is a null pointer in case 38 /* of success, or a pointer into the byte_codes buffer with a 39 /* detailed problem description. 40 /* 41 /* ip_match_save() saves the result from ip_match_parse() for 42 /* longer-term usage. The result should be passed to myfree(). 43 /* 44 /* ip_match_execute() matches a binary network in addr_bytes 45 /* against a byte-code array in byte_codes. It is an error to 46 /* use different address families for the byte_codes and addr_bytes 47 /* arguments (the first byte-code value contains the expected 48 /* address family). The result is non-zero in case of success. 49 /* 50 /* ip_match_dump() produces an ASCII dump of a byte-code array. 51 /* The dump is supposed to be identical to the input pattern 52 /* modulo upper/lower case or leading nulls with IPv6). This 53 /* function is primarily a debugging aid. 54 /* 55 /* Arguments 56 /* .IP addr_bytes 57 /* Binary network address in network-byte order. 58 /* .IP byte_codes 59 /* Byte-code array produced by ip_match_parse(). 60 /* .IP pattern 61 /* Human-readable address pattern. 62 /* .IP printable 63 /* storage for ASCII dump of a byte-code array. 64 /* IPV4 PATTERN SYNTAX 65 /* .ad 66 /* .fi 67 /* An IPv4 address pattern has four fields separated by ".". 68 /* Each field is either a decimal number, or a sequence inside 69 /* "[]" that contains one or more ";"-separated decimal 70 /* numbers or number..number ranges. 71 /* 72 /* Examples of patterns are 1.2.3.4 (matches itself, as one 73 /* would expect) and 1.2.3.[2,4,6..8] (matches 1.2.3.2, 1.2.3.4, 74 /* 1.2.3.6, 1.2.3.7, 1.2.3.8). 75 /* 76 /* Thus, any pattern field can be a sequence inside "[]", but 77 /* a "[]" sequence cannot span multiple address fields, and 78 /* a pattern field cannot contain both a number and a "[]" 79 /* sequence at the same time. 80 /* 81 /* This means that the pattern 1.2.[3.4] is not valid (the 82 /* sequence [3.4] cannot span two address fields) and the 83 /* pattern 1.2.3.3[6..9] is also not valid (the last field 84 /* cannot be both number 3 and sequence [6..9] at the same 85 /* time). 86 /* 87 /* The syntax for IPv4 patterns is as follows: 88 /* 89 /* .in +5 90 /* v4pattern = v4field "." v4field "." v4field "." v4field 91 /* .br 92 /* v4field = v4octet | "[" v4sequence "]" 93 /* .br 94 /* v4octet = any decimal number in the range 0 through 255 95 /* .br 96 /* v4sequence = v4seq_member | v4sequence ";" v4seq_member 97 /* .br 98 /* v4seq_member = v4octet | v4octet ".." v4octet 99 /* .in 100 /* LICENSE 101 /* .ad 102 /* .fi 103 /* The Secure Mailer license must be distributed with this 104 /* software. 105 /* AUTHOR(S) 106 /* Wietse Venema 107 /* IBM T.J. Watson Research 108 /* P.O. Box 704 109 /* Yorktown Heights, NY 10598, USA 110 /*--*/ 111 112 /* System library. */ 113 114 #include <sys_defs.h> 115 #include <sys/socket.h> 116 #include <ctype.h> 117 #include <string.h> 118 119 /* Utility library. */ 120 121 #include <msg.h> 122 #include <mymalloc.h> 123 #include <vstring.h> 124 #include <ip_match.h> 125 126 /* 127 * Token values. The in-band values are also used as byte-code values. 128 */ 129 #define IP_MATCH_CODE_OPEN '[' /* in-band */ 130 #define IP_MATCH_CODE_CLOSE ']' /* in-band */ 131 #define IP_MATCH_CODE_OVAL 'N' /* in-band */ 132 #define IP_MATCH_CODE_RANGE 'R' /* in-band */ 133 #define IP_MATCH_CODE_EOF '\0' /* in-band */ 134 #define IP_MATCH_CODE_ERR 256 /* out-of-band */ 135 136 /* 137 * SLMs. 138 */ 139 #define STR vstring_str 140 #define LEN VSTRING_LEN 141 142 /* ip_match_save - make longer-term copy of byte code */ 143 144 char *ip_match_save(const VSTRING *byte_codes) 145 { 146 char *dst; 147 148 dst = mymalloc(LEN(byte_codes)); 149 return (memcpy(dst, STR(byte_codes), LEN(byte_codes))); 150 } 151 152 /* ip_match_dump - byte-code pretty printer */ 153 154 char *ip_match_dump(VSTRING *printable, const char *byte_codes) 155 { 156 const char *myname = "ip_match_dump"; 157 const unsigned char *bp; 158 int octet_count = 0; 159 int ch; 160 161 /* 162 * Sanity check. Use different dumping loops for AF_INET and AF_INET6. 163 */ 164 if (*byte_codes != AF_INET) 165 msg_panic("%s: malformed byte-code header", myname); 166 167 /* 168 * Pretty-print and sanity-check the byte codes. Note: the loops in this 169 * code have no auto-increment at the end of the iteration. Instead, each 170 * byte-code handler bumps the byte-code pointer appropriately. 171 */ 172 VSTRING_RESET(printable); 173 bp = (const unsigned char *) byte_codes + 1; 174 for (;;) { 175 176 /* 177 * Simple numeric field. 178 */ 179 if ((ch = *bp++) == IP_MATCH_CODE_OVAL) { 180 vstring_sprintf_append(printable, "%d", *bp); 181 bp += 1; 182 } 183 184 /* 185 * Wild-card numeric field. 186 */ 187 else if (ch == IP_MATCH_CODE_OPEN) { 188 vstring_sprintf_append(printable, "["); 189 for (;;) { 190 /* Numeric range. */ 191 if ((ch = *bp++) == IP_MATCH_CODE_RANGE) { 192 vstring_sprintf_append(printable, "%d..%d", bp[0], bp[1]); 193 bp += 2; 194 } 195 /* Number. */ 196 else if (ch == IP_MATCH_CODE_OVAL) { 197 vstring_sprintf_append(printable, "%d", *bp); 198 bp += 1; 199 } 200 /* End-of-wildcard. */ 201 else if (ch == IP_MATCH_CODE_CLOSE) { 202 break; 203 } 204 /* Corruption. */ 205 else { 206 msg_panic("%s: unexpected byte code (decimal %d) " 207 "after \"%s\"", myname, ch, STR(printable)); 208 } 209 /* Output the wild-card field separator and repeat the loop. */ 210 if (*bp != IP_MATCH_CODE_CLOSE) 211 vstring_sprintf_append(printable, ";"); 212 } 213 vstring_sprintf_append(printable, "]"); 214 } 215 216 /* 217 * Corruption. 218 */ 219 else { 220 msg_panic("%s: unexpected byte code (decimal %d) after \"%s\"", 221 myname, ch, STR(printable)); 222 } 223 224 /* 225 * Require four octets, not one more, not one less. 226 */ 227 if (++octet_count == 4) { 228 if (*bp != 0) 229 msg_panic("%s: unexpected byte code (decimal %d) after \"%s\"", 230 myname, ch, STR(printable)); 231 return (STR(printable)); 232 } 233 if (*bp == 0) 234 msg_panic("%s: truncated byte code after \"%s\"", 235 myname, STR(printable)); 236 237 /* 238 * Output the address field separator and repeat the loop. 239 */ 240 vstring_sprintf_append(printable, "."); 241 } 242 } 243 244 /* ip_match_print_code_prefix - printable byte-code prefix */ 245 246 static char *ip_match_print_code_prefix(const char *byte_codes, size_t len) 247 { 248 static VSTRING *printable = 0; 249 const char *fmt; 250 const char *bp; 251 252 /* 253 * This is primarily for emergency debugging so we don't care about 254 * non-reentrancy. 255 */ 256 if (printable == 0) 257 printable = vstring_alloc(100); 258 else 259 VSTRING_RESET(printable); 260 261 /* 262 * Use decimal for IPv4 and hexadecimal otherwise, so that address octet 263 * values are easy to recognize. 264 */ 265 fmt = (*byte_codes == AF_INET ? "%d " : "%02x "); 266 for (bp = byte_codes; bp < byte_codes + len; bp++) 267 vstring_sprintf_append(printable, fmt, *(const unsigned char *) bp); 268 269 return (STR(printable)); 270 } 271 272 /* ip_match_execute - byte-code matching engine */ 273 274 int ip_match_execute(const char *byte_codes, const char *addr_bytes) 275 { 276 const char *myname = "ip_match_execute"; 277 const unsigned char *bp; 278 const unsigned char *ap; 279 int octet_count = 0; 280 int ch; 281 int matched; 282 283 /* 284 * Sanity check. Use different execute loops for AF_INET and AF_INET6. 285 */ 286 if (*byte_codes != AF_INET) 287 msg_panic("%s: malformed byte-code header (decimal %d)", 288 myname, *(const unsigned char *) byte_codes); 289 290 /* 291 * Match the address bytes against the byte codes. Avoid problems with 292 * (char -> int) sign extension on architectures with signed characters. 293 */ 294 bp = (const unsigned char *) byte_codes + 1; 295 ap = (const unsigned char *) addr_bytes; 296 297 for (octet_count = 0; octet_count < 4; octet_count++, ap++) { 298 299 /* 300 * Simple numeric field. 301 */ 302 if ((ch = *bp++) == IP_MATCH_CODE_OVAL) { 303 if (*ap == *bp) 304 bp += 1; 305 else 306 return (0); 307 } 308 309 /* 310 * Wild-card numeric field. 311 */ 312 else if (ch == IP_MATCH_CODE_OPEN) { 313 matched = 0; 314 for (;;) { 315 /* Numeric range. */ 316 if ((ch = *bp++) == IP_MATCH_CODE_RANGE) { 317 if (!matched) 318 matched = (*ap >= bp[0] && *ap <= bp[1]); 319 bp += 2; 320 } 321 /* Number. */ 322 else if (ch == IP_MATCH_CODE_OVAL) { 323 if (!matched) 324 matched = (*ap == *bp); 325 bp += 1; 326 } 327 /* End-of-wildcard. */ 328 else if (ch == IP_MATCH_CODE_CLOSE) { 329 break; 330 } 331 /* Corruption. */ 332 else { 333 size_t len = (const char *) bp - byte_codes - 1; 334 335 msg_panic("%s: unexpected byte code (decimal %d) " 336 "after \"%s\"", myname, ch, 337 ip_match_print_code_prefix(byte_codes, len)); 338 } 339 } 340 if (matched == 0) 341 return (0); 342 } 343 344 /* 345 * Corruption. 346 */ 347 else { 348 size_t len = (const char *) bp - byte_codes - 1; 349 350 msg_panic("%s: unexpected byte code (decimal %d) after \"%s\"", 351 myname, ch, ip_match_print_code_prefix(byte_codes, len)); 352 } 353 } 354 return (1); 355 } 356 357 /* ip_match_next_token - carve out the next token from input pattern */ 358 359 static int ip_match_next_token(char **pstart, char **psaved_start, int *poval) 360 { 361 unsigned char *cp; 362 int oval; /* octet value */ 363 int type; /* token value */ 364 365 /* 366 * Return a literal, error, or EOF token. Update the read pointer to the 367 * start of the next token or leave it at the string terminator. 368 */ 369 #define IP_MATCH_RETURN_TOK(next, type) \ 370 do { *pstart = (char *) (next); return (type); } while (0) 371 372 /* 373 * Return a token that contains an IPv4 address octet value. 374 */ 375 #define IP_MATCH_RETURN_TOK_VAL(next, type, oval) do { \ 376 *poval = (oval); IP_MATCH_RETURN_TOK((next), type); \ 377 } while (0) 378 379 /* 380 * Light-weight tokenizer. Each result is an IPv4 address octet value, a 381 * literal character value, error, or EOF. 382 */ 383 *psaved_start = *pstart; 384 cp = (unsigned char *) *pstart; 385 if (ISDIGIT(*cp)) { 386 oval = *cp - '0'; 387 type = IP_MATCH_CODE_OVAL; 388 for (cp += 1; ISDIGIT(*cp); cp++) { 389 oval *= 10; 390 oval += *cp - '0'; 391 if (oval > 255) 392 type = IP_MATCH_CODE_ERR; 393 } 394 IP_MATCH_RETURN_TOK_VAL(cp, type, oval); 395 } else { 396 IP_MATCH_RETURN_TOK(*cp ? cp + 1 : cp, *cp); 397 } 398 } 399 400 /* ipmatch_print_parse_error - formatted parsing error, with context */ 401 402 static void PRINTFLIKE(5, 6) ipmatch_print_parse_error(VSTRING *reply, 403 char *start, 404 char *here, 405 char *next, 406 const char *fmt,...) 407 { 408 va_list ap; 409 int start_width; 410 int here_width; 411 412 /* 413 * Format the error type. 414 */ 415 va_start(ap, fmt); 416 vstring_vsprintf(reply, fmt, ap); 417 va_end(ap); 418 419 /* 420 * Format the error context. The syntax is complex enough that it is 421 * worth the effort to precisely indicate what input is in error. 422 * 423 * XXX Workaround for %.*s to avoid output when a zero width is specified. 424 */ 425 if (start != 0) { 426 start_width = here - start; 427 here_width = next - here; 428 vstring_sprintf_append(reply, " at \"%.*s>%.*s<%s\"", 429 start_width, start_width == 0 ? "" : start, 430 here_width, here_width == 0 ? "" : here, next); 431 } 432 } 433 434 /* ip_match_parse - parse an entire wild-card address pattern */ 435 436 char *ip_match_parse(VSTRING *byte_codes, char *pattern) 437 { 438 int octet_count; 439 char *saved_cp; 440 char *cp; 441 int token_type; 442 int look_ahead; 443 int oval; 444 int saved_oval; 445 446 /* 447 * Simplify this if we change to {} for wildcard notation. 448 */ 449 #define FIND_TERMINATOR(start, cp) do { \ 450 int _level = 0; \ 451 for (cp = (start) ; *cp; cp++) { \ 452 if (*cp == '[') _level++; \ 453 if (*cp != ']') continue; \ 454 if (--_level == 0) break; \ 455 } \ 456 } while (0) 457 458 /* 459 * Strip [] around the entire pattern. 460 */ 461 if (*pattern == '[') { 462 FIND_TERMINATOR(pattern, cp); 463 if (cp[0] == 0) { 464 vstring_sprintf(byte_codes, "missing \"]\" character"); 465 return (STR(byte_codes)); 466 } 467 if (cp[1] == 0) { 468 *cp = 0; 469 pattern += 1; 470 } 471 } 472 473 /* 474 * Sanity check. In this case we can't show any error context. 475 */ 476 if (*pattern == 0) { 477 vstring_sprintf(byte_codes, "empty address pattern"); 478 return (STR(byte_codes)); 479 } 480 481 /* 482 * Simple parser with on-the-fly encoding. For now, IPv4 support only. 483 * Use different parser loops for IPv4 and IPv6. 484 */ 485 VSTRING_RESET(byte_codes); 486 VSTRING_ADDCH(byte_codes, AF_INET); 487 octet_count = 0; 488 cp = pattern; 489 490 /* 491 * Require four address fields separated by ".", each field containing a 492 * numeric octet value or a sequence inside []. The loop head has no test 493 * and does not step the loop variable. The tokenizer advances the loop 494 * variable, and the loop termination logic is inside the loop. 495 */ 496 for (;;) { 497 switch (token_type = ip_match_next_token(&cp, &saved_cp, &oval)) { 498 499 /* 500 * Numeric address field. 501 */ 502 case IP_MATCH_CODE_OVAL: 503 VSTRING_ADDCH(byte_codes, IP_MATCH_CODE_OVAL); 504 VSTRING_ADDCH(byte_codes, oval); 505 break; 506 507 /* 508 * Wild-card address field. 509 */ 510 case IP_MATCH_CODE_OPEN: 511 VSTRING_ADDCH(byte_codes, IP_MATCH_CODE_OPEN); 512 /* Require ";"-separated numbers or numeric ranges. */ 513 for (;;) { 514 token_type = ip_match_next_token(&cp, &saved_cp, &oval); 515 if (token_type == IP_MATCH_CODE_OVAL) { 516 saved_oval = oval; 517 look_ahead = ip_match_next_token(&cp, &saved_cp, &oval); 518 /* Numeric range. */ 519 if (look_ahead == '.') { 520 /* Brute-force parsing. */ 521 if (ip_match_next_token(&cp, &saved_cp, &oval) == '.' 522 && ip_match_next_token(&cp, &saved_cp, &oval) 523 == IP_MATCH_CODE_OVAL 524 && saved_oval <= oval) { 525 VSTRING_ADDCH(byte_codes, IP_MATCH_CODE_RANGE); 526 VSTRING_ADDCH(byte_codes, saved_oval); 527 VSTRING_ADDCH(byte_codes, oval); 528 look_ahead = 529 ip_match_next_token(&cp, &saved_cp, &oval); 530 } else { 531 ipmatch_print_parse_error(byte_codes, pattern, 532 saved_cp, cp, 533 "numeric range error"); 534 return (STR(byte_codes)); 535 } 536 } 537 /* Single number. */ 538 else { 539 VSTRING_ADDCH(byte_codes, IP_MATCH_CODE_OVAL); 540 VSTRING_ADDCH(byte_codes, saved_oval); 541 } 542 /* Require ";" or end-of-wildcard. */ 543 token_type = look_ahead; 544 if (token_type == ';') { 545 continue; 546 } else if (token_type == IP_MATCH_CODE_CLOSE) { 547 break; 548 } else { 549 ipmatch_print_parse_error(byte_codes, pattern, 550 saved_cp, cp, 551 "need \";\" or \"%c\"", 552 IP_MATCH_CODE_CLOSE); 553 return (STR(byte_codes)); 554 } 555 } else { 556 ipmatch_print_parse_error(byte_codes, pattern, saved_cp, cp, 557 "need decimal number 0..255"); 558 return (STR(byte_codes)); 559 } 560 } 561 VSTRING_ADDCH(byte_codes, IP_MATCH_CODE_CLOSE); 562 break; 563 564 /* 565 * Invalid field. 566 */ 567 default: 568 ipmatch_print_parse_error(byte_codes, pattern, saved_cp, cp, 569 "need decimal number 0..255 or \"%c\"", 570 IP_MATCH_CODE_OPEN); 571 return (STR(byte_codes)); 572 } 573 octet_count += 1; 574 575 /* 576 * Require four address fields. Not one more, not one less. 577 */ 578 if (octet_count == 4) { 579 if (*cp != 0) { 580 (void) ip_match_next_token(&cp, &saved_cp, &oval); 581 ipmatch_print_parse_error(byte_codes, pattern, saved_cp, cp, 582 "garbage after pattern"); 583 return (STR(byte_codes)); 584 } 585 VSTRING_ADDCH(byte_codes, 0); 586 return (0); 587 } 588 589 /* 590 * Require "." before the next address field. 591 */ 592 if (ip_match_next_token(&cp, &saved_cp, &oval) != '.') { 593 ipmatch_print_parse_error(byte_codes, pattern, saved_cp, cp, 594 "need \".\""); 595 return (STR(byte_codes)); 596 } 597 } 598 } 599 600 #ifdef TEST 601 602 /* 603 * Dummy main program for regression tests. 604 */ 605 #include <sys/socket.h> 606 #include <netinet/in.h> 607 #include <arpa/inet.h> 608 #include <stdlib.h> 609 #include <unistd.h> 610 #include <vstream.h> 611 #include <vstring_vstream.h> 612 #include <stringops.h> 613 614 int main(int argc, char **argv) 615 { 616 VSTRING *byte_codes = vstring_alloc(100); 617 VSTRING *line_buf = vstring_alloc(100); 618 char *bufp; 619 char *err; 620 char *user_pattern; 621 char *user_address; 622 int echo_input = !isatty(0); 623 624 /* 625 * Iterate over the input stream. The input format is a pattern, followed 626 * by optional addresses to match against. 627 */ 628 while (vstring_fgets_nonl(line_buf, VSTREAM_IN)) { 629 bufp = STR(line_buf); 630 if (echo_input) { 631 vstream_printf("> %s\n", bufp); 632 vstream_fflush(VSTREAM_OUT); 633 } 634 if (*bufp == '#') 635 continue; 636 if ((user_pattern = mystrtok(&bufp, " \t")) == 0) 637 continue; 638 639 /* 640 * Parse and dump the pattern. 641 */ 642 if ((err = ip_match_parse(byte_codes, user_pattern)) != 0) { 643 vstream_printf("Error: %s\n", err); 644 } else { 645 vstream_printf("Code: %s\n", 646 ip_match_dump(line_buf, STR(byte_codes))); 647 } 648 vstream_fflush(VSTREAM_OUT); 649 650 /* 651 * Match the optional patterns. 652 */ 653 while ((user_address = mystrtok(&bufp, " \t")) != 0) { 654 struct in_addr netw_addr; 655 656 switch (inet_pton(AF_INET, user_address, &netw_addr)) { 657 case 1: 658 vstream_printf("Match %s: %s\n", user_address, 659 ip_match_execute(STR(byte_codes), 660 (char *) &netw_addr.s_addr) ? 661 "yes" : "no"); 662 break; 663 case 0: 664 vstream_printf("bad address syntax: %s\n", user_address); 665 break; 666 case -1: 667 vstream_printf("%s: %m\n", user_address); 668 break; 669 } 670 vstream_fflush(VSTREAM_OUT); 671 } 672 } 673 vstring_free(line_buf); 674 vstring_free(byte_codes); 675 exit(0); 676 } 677 678 #endif 679