1 /* $NetBSD: optimize.c,v 1.11 2019/10/01 16:02:11 christos Exp $ */ 2 3 /* 4 * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that: (1) source code distributions 9 * retain the above copyright notice and this paragraph in its entirety, (2) 10 * distributions including binary code include the above copyright notice and 11 * this paragraph in its entirety in the documentation or other materials 12 * provided with the distribution, and (3) all advertising materials mentioning 13 * features or use of this software display the following acknowledgement: 14 * ``This product includes software developed by the University of California, 15 * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of 16 * the University nor the names of its contributors may be used to endorse 17 * or promote products derived from this software without specific prior 18 * written permission. 19 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED 20 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF 21 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 22 * 23 * Optimization module for BPF code intermediate representation. 24 */ 25 26 #include <sys/cdefs.h> 27 __RCSID("$NetBSD: optimize.c,v 1.11 2019/10/01 16:02:11 christos Exp $"); 28 29 #ifdef HAVE_CONFIG_H 30 #include <config.h> 31 #endif 32 33 #include <pcap-types.h> 34 35 #include <stdio.h> 36 #include <stdlib.h> 37 #include <memory.h> 38 #include <setjmp.h> 39 #include <string.h> 40 41 #include <errno.h> 42 43 #include "pcap-int.h" 44 45 #include "gencode.h" 46 #include "optimize.h" 47 48 #ifdef HAVE_OS_PROTO_H 49 #include "os-proto.h" 50 #endif 51 52 #ifdef BDEBUG 53 /* 54 * The internal "debug printout" flag for the filter expression optimizer. 55 * The code to print that stuff is present only if BDEBUG is defined, so 56 * the flag, and the routine to set it, are defined only if BDEBUG is 57 * defined. 58 */ 59 static int pcap_optimizer_debug; 60 61 /* 62 * Routine to set that flag. 63 * 64 * This is intended for libpcap developers, not for general use. 65 * If you want to set these in a program, you'll have to declare this 66 * routine yourself, with the appropriate DLL import attribute on Windows; 67 * it's not declared in any header file, and won't be declared in any 68 * header file provided by libpcap. 69 */ 70 PCAP_API void pcap_set_optimizer_debug(int value); 71 72 PCAP_API_DEF void 73 pcap_set_optimizer_debug(int value) 74 { 75 pcap_optimizer_debug = value; 76 } 77 78 /* 79 * The internal "print dot graph" flag for the filter expression optimizer. 80 * The code to print that stuff is present only if BDEBUG is defined, so 81 * the flag, and the routine to set it, are defined only if BDEBUG is 82 * defined. 83 */ 84 static int pcap_print_dot_graph; 85 86 /* 87 * Routine to set that flag. 88 * 89 * This is intended for libpcap developers, not for general use. 90 * If you want to set these in a program, you'll have to declare this 91 * routine yourself, with the appropriate DLL import attribute on Windows; 92 * it's not declared in any header file, and won't be declared in any 93 * header file provided by libpcap. 94 */ 95 PCAP_API void pcap_set_print_dot_graph(int value); 96 97 PCAP_API_DEF void 98 pcap_set_print_dot_graph(int value) 99 { 100 pcap_print_dot_graph = value; 101 } 102 103 #endif 104 105 /* 106 * lowest_set_bit(). 107 * 108 * Takes a 32-bit integer as an argument. 109 * 110 * If handed a non-zero value, returns the index of the lowest set bit, 111 * counting upwards fro zero. 112 * 113 * If handed zero, the results are platform- and compiler-dependent. 114 * Keep it out of the light, don't give it any water, don't feed it 115 * after midnight, and don't pass zero to it. 116 * 117 * This is the same as the count of trailing zeroes in the word. 118 */ 119 #if PCAP_IS_AT_LEAST_GNUC_VERSION(3,4) 120 /* 121 * GCC 3.4 and later; we have __builtin_ctz(). 122 */ 123 #define lowest_set_bit(mask) __builtin_ctz(mask) 124 #elif defined(_MSC_VER) 125 /* 126 * Visual Studio; we support only 2005 and later, so use 127 * _BitScanForward(). 128 */ 129 #include <intrin.h> 130 131 #ifndef __clang__ 132 #pragma intrinsic(_BitScanForward) 133 #endif 134 135 static __forceinline int 136 lowest_set_bit(int mask) 137 { 138 unsigned long bit; 139 140 /* 141 * Don't sign-extend mask if long is longer than int. 142 * (It's currently not, in MSVC, even on 64-bit platforms, but....) 143 */ 144 if (_BitScanForward(&bit, (unsigned int)mask) == 0) 145 return -1; /* mask is zero */ 146 return (int)bit; 147 } 148 #elif defined(MSDOS) && defined(__DJGPP__) 149 /* 150 * MS-DOS with DJGPP, which declares ffs() in <string.h>, which 151 * we've already included. 152 */ 153 #define lowest_set_bit(mask) (ffs((mask)) - 1) 154 #elif (defined(MSDOS) && defined(__WATCOMC__)) || defined(STRINGS_H_DECLARES_FFS) 155 /* 156 * MS-DOS with Watcom C, which has <strings.h> and declares ffs() there, 157 * or some other platform (UN*X conforming to a sufficient recent version 158 * of the Single UNIX Specification). 159 */ 160 #include <strings.h> 161 #define lowest_set_bit(mask) (ffs((mask)) - 1) 162 #else 163 /* 164 * None of the above. 165 * Use a perfect-hash-function-based function. 166 */ 167 static int 168 lowest_set_bit(int mask) 169 { 170 unsigned int v = (unsigned int)mask; 171 172 static const int MultiplyDeBruijnBitPosition[32] = { 173 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, 174 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 175 }; 176 177 /* 178 * We strip off all but the lowermost set bit (v & ~v), 179 * and perform a minimal perfect hash on it to look up the 180 * number of low-order zero bits in a table. 181 * 182 * See: 183 * 184 * http://7ooo.mooo.com/text/ComputingTrailingZerosHOWTO.pdf 185 * 186 * http://supertech.csail.mit.edu/papers/debruijn.pdf 187 */ 188 return (MultiplyDeBruijnBitPosition[((v & -v) * 0x077CB531U) >> 27]); 189 } 190 #endif 191 192 /* 193 * Represents a deleted instruction. 194 */ 195 #define NOP -1 196 197 /* 198 * Register numbers for use-def values. 199 * 0 through BPF_MEMWORDS-1 represent the corresponding scratch memory 200 * location. A_ATOM is the accumulator and X_ATOM is the index 201 * register. 202 */ 203 #define A_ATOM BPF_MEMWORDS 204 #define X_ATOM (BPF_MEMWORDS+1) 205 206 /* 207 * This define is used to represent *both* the accumulator and 208 * x register in use-def computations. 209 * Currently, the use-def code assumes only one definition per instruction. 210 */ 211 #define AX_ATOM N_ATOMS 212 213 /* 214 * These data structures are used in a Cocke and Shwarz style 215 * value numbering scheme. Since the flowgraph is acyclic, 216 * exit values can be propagated from a node's predecessors 217 * provided it is uniquely defined. 218 */ 219 struct valnode { 220 int code; 221 int v0, v1; 222 int val; 223 struct valnode *next; 224 }; 225 226 /* Integer constants mapped with the load immediate opcode. */ 227 #define K(i) F(opt_state, BPF_LD|BPF_IMM|BPF_W, i, 0L) 228 229 struct vmapinfo { 230 int is_const; 231 bpf_int32 const_val; 232 }; 233 234 typedef struct { 235 /* 236 * Place to longjmp to on an error. 237 */ 238 jmp_buf top_ctx; 239 240 /* 241 * The buffer into which to put error message. 242 */ 243 char *errbuf; 244 245 /* 246 * A flag to indicate that further optimization is needed. 247 * Iterative passes are continued until a given pass yields no 248 * branch movement. 249 */ 250 int done; 251 252 int n_blocks; 253 struct block **blocks; 254 int n_edges; 255 struct edge **edges; 256 257 /* 258 * A bit vector set representation of the dominators. 259 * We round up the set size to the next power of two. 260 */ 261 int nodewords; 262 int edgewords; 263 struct block **levels; 264 bpf_u_int32 *space; 265 266 #define BITS_PER_WORD (8*sizeof(bpf_u_int32)) 267 /* 268 * True if a is in uset {p} 269 */ 270 #define SET_MEMBER(p, a) \ 271 ((p)[(unsigned)(a) / BITS_PER_WORD] & ((bpf_u_int32)1 << ((unsigned)(a) % BITS_PER_WORD))) 272 273 /* 274 * Add 'a' to uset p. 275 */ 276 #define SET_INSERT(p, a) \ 277 (p)[(unsigned)(a) / BITS_PER_WORD] |= ((bpf_u_int32)1 << ((unsigned)(a) % BITS_PER_WORD)) 278 279 /* 280 * Delete 'a' from uset p. 281 */ 282 #define SET_DELETE(p, a) \ 283 (p)[(unsigned)(a) / BITS_PER_WORD] &= ~((bpf_u_int32)1 << ((unsigned)(a) % BITS_PER_WORD)) 284 285 /* 286 * a := a intersect b 287 */ 288 #define SET_INTERSECT(a, b, n)\ 289 {\ 290 register bpf_u_int32 *_x = a, *_y = b;\ 291 register int _n = n;\ 292 while (--_n >= 0) *_x++ &= *_y++;\ 293 } 294 295 /* 296 * a := a - b 297 */ 298 #define SET_SUBTRACT(a, b, n)\ 299 {\ 300 register bpf_u_int32 *_x = a, *_y = b;\ 301 register int _n = n;\ 302 while (--_n >= 0) *_x++ &=~ *_y++;\ 303 } 304 305 /* 306 * a := a union b 307 */ 308 #define SET_UNION(a, b, n)\ 309 {\ 310 register bpf_u_int32 *_x = a, *_y = b;\ 311 register int _n = n;\ 312 while (--_n >= 0) *_x++ |= *_y++;\ 313 } 314 315 uset all_dom_sets; 316 uset all_closure_sets; 317 uset all_edge_sets; 318 319 #define MODULUS 213 320 struct valnode *hashtbl[MODULUS]; 321 int curval; 322 int maxval; 323 324 struct vmapinfo *vmap; 325 struct valnode *vnode_base; 326 struct valnode *next_vnode; 327 } opt_state_t; 328 329 typedef struct { 330 /* 331 * Place to longjmp to on an error. 332 */ 333 jmp_buf top_ctx; 334 335 /* 336 * The buffer into which to put error message. 337 */ 338 char *errbuf; 339 340 /* 341 * Some pointers used to convert the basic block form of the code, 342 * into the array form that BPF requires. 'fstart' will point to 343 * the malloc'd array while 'ftail' is used during the recursive 344 * traversal. 345 */ 346 struct bpf_insn *fstart; 347 struct bpf_insn *ftail; 348 } conv_state_t; 349 350 static void opt_init(opt_state_t *, struct icode *); 351 static void opt_cleanup(opt_state_t *); 352 static void PCAP_NORETURN opt_error(opt_state_t *, const char *, ...) 353 PCAP_PRINTFLIKE(2, 3); 354 355 static void intern_blocks(opt_state_t *, struct icode *); 356 357 static void find_inedges(opt_state_t *, struct block *); 358 #ifdef BDEBUG 359 static void opt_dump(opt_state_t *, struct icode *); 360 #endif 361 362 #ifndef MAX 363 #define MAX(a,b) ((a)>(b)?(a):(b)) 364 #endif 365 366 static void 367 find_levels_r(opt_state_t *opt_state, struct icode *ic, struct block *b) 368 { 369 int level; 370 371 if (isMarked(ic, b)) 372 return; 373 374 Mark(ic, b); 375 b->link = 0; 376 377 if (JT(b)) { 378 find_levels_r(opt_state, ic, JT(b)); 379 find_levels_r(opt_state, ic, JF(b)); 380 level = MAX(JT(b)->level, JF(b)->level) + 1; 381 } else 382 level = 0; 383 b->level = level; 384 b->link = opt_state->levels[level]; 385 opt_state->levels[level] = b; 386 } 387 388 /* 389 * Level graph. The levels go from 0 at the leaves to 390 * N_LEVELS at the root. The opt_state->levels[] array points to the 391 * first node of the level list, whose elements are linked 392 * with the 'link' field of the struct block. 393 */ 394 static void 395 find_levels(opt_state_t *opt_state, struct icode *ic) 396 { 397 memset((char *)opt_state->levels, 0, opt_state->n_blocks * sizeof(*opt_state->levels)); 398 unMarkAll(ic); 399 find_levels_r(opt_state, ic, ic->root); 400 } 401 402 /* 403 * Find dominator relationships. 404 * Assumes graph has been leveled. 405 */ 406 static void 407 find_dom(opt_state_t *opt_state, struct block *root) 408 { 409 int i; 410 struct block *b; 411 bpf_u_int32 *x; 412 413 /* 414 * Initialize sets to contain all nodes. 415 */ 416 x = opt_state->all_dom_sets; 417 i = opt_state->n_blocks * opt_state->nodewords; 418 while (--i >= 0) 419 *x++ = 0xFFFFFFFFU; 420 /* Root starts off empty. */ 421 for (i = opt_state->nodewords; --i >= 0;) 422 root->dom[i] = 0; 423 424 /* root->level is the highest level no found. */ 425 for (i = root->level; i >= 0; --i) { 426 for (b = opt_state->levels[i]; b; b = b->link) { 427 SET_INSERT(b->dom, b->id); 428 if (JT(b) == 0) 429 continue; 430 SET_INTERSECT(JT(b)->dom, b->dom, opt_state->nodewords); 431 SET_INTERSECT(JF(b)->dom, b->dom, opt_state->nodewords); 432 } 433 } 434 } 435 436 static void 437 propedom(opt_state_t *opt_state, struct edge *ep) 438 { 439 SET_INSERT(ep->edom, ep->id); 440 if (ep->succ) { 441 SET_INTERSECT(ep->succ->et.edom, ep->edom, opt_state->edgewords); 442 SET_INTERSECT(ep->succ->ef.edom, ep->edom, opt_state->edgewords); 443 } 444 } 445 446 /* 447 * Compute edge dominators. 448 * Assumes graph has been leveled and predecessors established. 449 */ 450 static void 451 find_edom(opt_state_t *opt_state, struct block *root) 452 { 453 int i; 454 uset x; 455 struct block *b; 456 457 x = opt_state->all_edge_sets; 458 for (i = opt_state->n_edges * opt_state->edgewords; --i >= 0; ) 459 x[i] = 0xFFFFFFFFU; 460 461 /* root->level is the highest level no found. */ 462 memset(root->et.edom, 0, opt_state->edgewords * sizeof(*(uset)0)); 463 memset(root->ef.edom, 0, opt_state->edgewords * sizeof(*(uset)0)); 464 for (i = root->level; i >= 0; --i) { 465 for (b = opt_state->levels[i]; b != 0; b = b->link) { 466 propedom(opt_state, &b->et); 467 propedom(opt_state, &b->ef); 468 } 469 } 470 } 471 472 /* 473 * Find the backwards transitive closure of the flow graph. These sets 474 * are backwards in the sense that we find the set of nodes that reach 475 * a given node, not the set of nodes that can be reached by a node. 476 * 477 * Assumes graph has been leveled. 478 */ 479 static void 480 find_closure(opt_state_t *opt_state, struct block *root) 481 { 482 int i; 483 struct block *b; 484 485 /* 486 * Initialize sets to contain no nodes. 487 */ 488 memset((char *)opt_state->all_closure_sets, 0, 489 opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->all_closure_sets)); 490 491 /* root->level is the highest level no found. */ 492 for (i = root->level; i >= 0; --i) { 493 for (b = opt_state->levels[i]; b; b = b->link) { 494 SET_INSERT(b->closure, b->id); 495 if (JT(b) == 0) 496 continue; 497 SET_UNION(JT(b)->closure, b->closure, opt_state->nodewords); 498 SET_UNION(JF(b)->closure, b->closure, opt_state->nodewords); 499 } 500 } 501 } 502 503 /* 504 * Return the register number that is used by s. If A and X are both 505 * used, return AX_ATOM. If no register is used, return -1. 506 * 507 * The implementation should probably change to an array access. 508 */ 509 static int 510 atomuse(struct stmt *s) 511 { 512 register int c = s->code; 513 514 if (c == NOP) 515 return -1; 516 517 switch (BPF_CLASS(c)) { 518 519 case BPF_RET: 520 return (BPF_RVAL(c) == BPF_A) ? A_ATOM : 521 (BPF_RVAL(c) == BPF_X) ? X_ATOM : -1; 522 523 case BPF_LD: 524 case BPF_LDX: 525 return (BPF_MODE(c) == BPF_IND) ? X_ATOM : 526 (BPF_MODE(c) == BPF_MEM) ? s->k : -1; 527 528 case BPF_ST: 529 return A_ATOM; 530 531 case BPF_STX: 532 return X_ATOM; 533 534 case BPF_JMP: 535 case BPF_ALU: 536 if (BPF_SRC(c) == BPF_X) 537 return AX_ATOM; 538 return A_ATOM; 539 540 case BPF_MISC: 541 return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM; 542 } 543 abort(); 544 /* NOTREACHED */ 545 } 546 547 /* 548 * Return the register number that is defined by 's'. We assume that 549 * a single stmt cannot define more than one register. If no register 550 * is defined, return -1. 551 * 552 * The implementation should probably change to an array access. 553 */ 554 static int 555 atomdef(struct stmt *s) 556 { 557 if (s->code == NOP) 558 return -1; 559 560 switch (BPF_CLASS(s->code)) { 561 562 case BPF_LD: 563 case BPF_ALU: 564 return A_ATOM; 565 566 case BPF_LDX: 567 return X_ATOM; 568 569 case BPF_ST: 570 case BPF_STX: 571 return s->k; 572 573 case BPF_MISC: 574 return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM; 575 } 576 return -1; 577 } 578 579 /* 580 * Compute the sets of registers used, defined, and killed by 'b'. 581 * 582 * "Used" means that a statement in 'b' uses the register before any 583 * statement in 'b' defines it, i.e. it uses the value left in 584 * that register by a predecessor block of this block. 585 * "Defined" means that a statement in 'b' defines it. 586 * "Killed" means that a statement in 'b' defines it before any 587 * statement in 'b' uses it, i.e. it kills the value left in that 588 * register by a predecessor block of this block. 589 */ 590 static void 591 compute_local_ud(struct block *b) 592 { 593 struct slist *s; 594 atomset def = 0, use = 0, killed = 0; 595 int atom; 596 597 for (s = b->stmts; s; s = s->next) { 598 if (s->s.code == NOP) 599 continue; 600 atom = atomuse(&s->s); 601 if (atom >= 0) { 602 if (atom == AX_ATOM) { 603 if (!ATOMELEM(def, X_ATOM)) 604 use |= ATOMMASK(X_ATOM); 605 if (!ATOMELEM(def, A_ATOM)) 606 use |= ATOMMASK(A_ATOM); 607 } 608 else if (atom < N_ATOMS) { 609 if (!ATOMELEM(def, atom)) 610 use |= ATOMMASK(atom); 611 } 612 else 613 abort(); 614 } 615 atom = atomdef(&s->s); 616 if (atom >= 0) { 617 if (!ATOMELEM(use, atom)) 618 killed |= ATOMMASK(atom); 619 def |= ATOMMASK(atom); 620 } 621 } 622 if (BPF_CLASS(b->s.code) == BPF_JMP) { 623 /* 624 * XXX - what about RET? 625 */ 626 atom = atomuse(&b->s); 627 if (atom >= 0) { 628 if (atom == AX_ATOM) { 629 if (!ATOMELEM(def, X_ATOM)) 630 use |= ATOMMASK(X_ATOM); 631 if (!ATOMELEM(def, A_ATOM)) 632 use |= ATOMMASK(A_ATOM); 633 } 634 else if (atom < N_ATOMS) { 635 if (!ATOMELEM(def, atom)) 636 use |= ATOMMASK(atom); 637 } 638 else 639 abort(); 640 } 641 } 642 643 b->def = def; 644 b->kill = killed; 645 b->in_use = use; 646 } 647 648 /* 649 * Assume graph is already leveled. 650 */ 651 static void 652 find_ud(opt_state_t *opt_state, struct block *root) 653 { 654 int i, maxlevel; 655 struct block *p; 656 657 /* 658 * root->level is the highest level no found; 659 * count down from there. 660 */ 661 maxlevel = root->level; 662 for (i = maxlevel; i >= 0; --i) 663 for (p = opt_state->levels[i]; p; p = p->link) { 664 compute_local_ud(p); 665 p->out_use = 0; 666 } 667 668 for (i = 1; i <= maxlevel; ++i) { 669 for (p = opt_state->levels[i]; p; p = p->link) { 670 p->out_use |= JT(p)->in_use | JF(p)->in_use; 671 p->in_use |= p->out_use &~ p->kill; 672 } 673 } 674 } 675 static void 676 init_val(opt_state_t *opt_state) 677 { 678 opt_state->curval = 0; 679 opt_state->next_vnode = opt_state->vnode_base; 680 memset((char *)opt_state->vmap, 0, opt_state->maxval * sizeof(*opt_state->vmap)); 681 memset((char *)opt_state->hashtbl, 0, sizeof opt_state->hashtbl); 682 } 683 684 /* Because we really don't have an IR, this stuff is a little messy. */ 685 static int 686 F(opt_state_t *opt_state, int code, int v0, int v1) 687 { 688 u_int hash; 689 int val; 690 struct valnode *p; 691 692 hash = (u_int)code ^ ((u_int)v0 << 4) ^ ((u_int)v1 << 8); 693 hash %= MODULUS; 694 695 for (p = opt_state->hashtbl[hash]; p; p = p->next) 696 if (p->code == code && p->v0 == v0 && p->v1 == v1) 697 return p->val; 698 699 val = ++opt_state->curval; 700 if (BPF_MODE(code) == BPF_IMM && 701 (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) { 702 opt_state->vmap[val].const_val = v0; 703 opt_state->vmap[val].is_const = 1; 704 } 705 p = opt_state->next_vnode++; 706 p->val = val; 707 p->code = code; 708 p->v0 = v0; 709 p->v1 = v1; 710 p->next = opt_state->hashtbl[hash]; 711 opt_state->hashtbl[hash] = p; 712 713 return val; 714 } 715 716 static inline void 717 vstore(struct stmt *s, int *valp, int newval, int alter) 718 { 719 if (alter && newval != VAL_UNKNOWN && *valp == newval) 720 s->code = NOP; 721 else 722 *valp = newval; 723 } 724 725 /* 726 * Do constant-folding on binary operators. 727 * (Unary operators are handled elsewhere.) 728 */ 729 static void 730 fold_op(opt_state_t *opt_state, struct stmt *s, int v0, int v1) 731 { 732 bpf_u_int32 a, b; 733 734 a = opt_state->vmap[v0].const_val; 735 b = opt_state->vmap[v1].const_val; 736 737 switch (BPF_OP(s->code)) { 738 case BPF_ADD: 739 a += b; 740 break; 741 742 case BPF_SUB: 743 a -= b; 744 break; 745 746 case BPF_MUL: 747 a *= b; 748 break; 749 750 case BPF_DIV: 751 if (b == 0) 752 opt_error(opt_state, "division by zero"); 753 a /= b; 754 break; 755 756 case BPF_MOD: 757 if (b == 0) 758 opt_error(opt_state, "modulus by zero"); 759 a %= b; 760 break; 761 762 case BPF_AND: 763 a &= b; 764 break; 765 766 case BPF_OR: 767 a |= b; 768 break; 769 770 case BPF_XOR: 771 a ^= b; 772 break; 773 774 case BPF_LSH: 775 /* 776 * A left shift of more than the width of the type 777 * is undefined in C; we'll just treat it as shifting 778 * all the bits out. 779 * 780 * XXX - the BPF interpreter doesn't check for this, 781 * so its behavior is dependent on the behavior of 782 * the processor on which it's running. There are 783 * processors on which it shifts all the bits out 784 * and processors on which it does no shift. 785 */ 786 if (b < 32) 787 a <<= b; 788 else 789 a = 0; 790 break; 791 792 case BPF_RSH: 793 /* 794 * A right shift of more than the width of the type 795 * is undefined in C; we'll just treat it as shifting 796 * all the bits out. 797 * 798 * XXX - the BPF interpreter doesn't check for this, 799 * so its behavior is dependent on the behavior of 800 * the processor on which it's running. There are 801 * processors on which it shifts all the bits out 802 * and processors on which it does no shift. 803 */ 804 if (b < 32) 805 a >>= b; 806 else 807 a = 0; 808 break; 809 810 default: 811 abort(); 812 } 813 s->k = a; 814 s->code = BPF_LD|BPF_IMM; 815 opt_state->done = 0; 816 } 817 818 static inline struct slist * 819 this_op(struct slist *s) 820 { 821 while (s != 0 && s->s.code == NOP) 822 s = s->next; 823 return s; 824 } 825 826 static void 827 opt_not(struct block *b) 828 { 829 struct block *tmp = JT(b); 830 831 JT(b) = JF(b); 832 JF(b) = tmp; 833 } 834 835 static void 836 opt_peep(opt_state_t *opt_state, struct block *b) 837 { 838 struct slist *s; 839 struct slist *next, *last; 840 int val; 841 842 s = b->stmts; 843 if (s == 0) 844 return; 845 846 last = s; 847 for (/*empty*/; /*empty*/; s = next) { 848 /* 849 * Skip over nops. 850 */ 851 s = this_op(s); 852 if (s == 0) 853 break; /* nothing left in the block */ 854 855 /* 856 * Find the next real instruction after that one 857 * (skipping nops). 858 */ 859 next = this_op(s->next); 860 if (next == 0) 861 break; /* no next instruction */ 862 last = next; 863 864 /* 865 * st M[k] --> st M[k] 866 * ldx M[k] tax 867 */ 868 if (s->s.code == BPF_ST && 869 next->s.code == (BPF_LDX|BPF_MEM) && 870 s->s.k == next->s.k) { 871 opt_state->done = 0; 872 next->s.code = BPF_MISC|BPF_TAX; 873 } 874 /* 875 * ld #k --> ldx #k 876 * tax txa 877 */ 878 if (s->s.code == (BPF_LD|BPF_IMM) && 879 next->s.code == (BPF_MISC|BPF_TAX)) { 880 s->s.code = BPF_LDX|BPF_IMM; 881 next->s.code = BPF_MISC|BPF_TXA; 882 opt_state->done = 0; 883 } 884 /* 885 * This is an ugly special case, but it happens 886 * when you say tcp[k] or udp[k] where k is a constant. 887 */ 888 if (s->s.code == (BPF_LD|BPF_IMM)) { 889 struct slist *add, *tax, *ild; 890 891 /* 892 * Check that X isn't used on exit from this 893 * block (which the optimizer might cause). 894 * We know the code generator won't generate 895 * any local dependencies. 896 */ 897 if (ATOMELEM(b->out_use, X_ATOM)) 898 continue; 899 900 /* 901 * Check that the instruction following the ldi 902 * is an addx, or it's an ldxms with an addx 903 * following it (with 0 or more nops between the 904 * ldxms and addx). 905 */ 906 if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B)) 907 add = next; 908 else 909 add = this_op(next->next); 910 if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X)) 911 continue; 912 913 /* 914 * Check that a tax follows that (with 0 or more 915 * nops between them). 916 */ 917 tax = this_op(add->next); 918 if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX)) 919 continue; 920 921 /* 922 * Check that an ild follows that (with 0 or more 923 * nops between them). 924 */ 925 ild = this_op(tax->next); 926 if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD || 927 BPF_MODE(ild->s.code) != BPF_IND) 928 continue; 929 /* 930 * We want to turn this sequence: 931 * 932 * (004) ldi #0x2 {s} 933 * (005) ldxms [14] {next} -- optional 934 * (006) addx {add} 935 * (007) tax {tax} 936 * (008) ild [x+0] {ild} 937 * 938 * into this sequence: 939 * 940 * (004) nop 941 * (005) ldxms [14] 942 * (006) nop 943 * (007) nop 944 * (008) ild [x+2] 945 * 946 * XXX We need to check that X is not 947 * subsequently used, because we want to change 948 * what'll be in it after this sequence. 949 * 950 * We know we can eliminate the accumulator 951 * modifications earlier in the sequence since 952 * it is defined by the last stmt of this sequence 953 * (i.e., the last statement of the sequence loads 954 * a value into the accumulator, so we can eliminate 955 * earlier operations on the accumulator). 956 */ 957 ild->s.k += s->s.k; 958 s->s.code = NOP; 959 add->s.code = NOP; 960 tax->s.code = NOP; 961 opt_state->done = 0; 962 } 963 } 964 /* 965 * If the comparison at the end of a block is an equality 966 * comparison against a constant, and nobody uses the value 967 * we leave in the A register at the end of a block, and 968 * the operation preceding the comparison is an arithmetic 969 * operation, we can sometime optimize it away. 970 */ 971 if (b->s.code == (BPF_JMP|BPF_JEQ|BPF_K) && 972 !ATOMELEM(b->out_use, A_ATOM)) { 973 /* 974 * We can optimize away certain subtractions of the 975 * X register. 976 */ 977 if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X)) { 978 val = b->val[X_ATOM]; 979 if (opt_state->vmap[val].is_const) { 980 /* 981 * If we have a subtract to do a comparison, 982 * and the X register is a known constant, 983 * we can merge this value into the 984 * comparison: 985 * 986 * sub x -> nop 987 * jeq #y jeq #(x+y) 988 */ 989 b->s.k += opt_state->vmap[val].const_val; 990 last->s.code = NOP; 991 opt_state->done = 0; 992 } else if (b->s.k == 0) { 993 /* 994 * If the X register isn't a constant, 995 * and the comparison in the test is 996 * against 0, we can compare with the 997 * X register, instead: 998 * 999 * sub x -> nop 1000 * jeq #0 jeq x 1001 */ 1002 last->s.code = NOP; 1003 b->s.code = BPF_JMP|BPF_JEQ|BPF_X; 1004 opt_state->done = 0; 1005 } 1006 } 1007 /* 1008 * Likewise, a constant subtract can be simplified: 1009 * 1010 * sub #x -> nop 1011 * jeq #y -> jeq #(x+y) 1012 */ 1013 else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K)) { 1014 last->s.code = NOP; 1015 b->s.k += last->s.k; 1016 opt_state->done = 0; 1017 } 1018 /* 1019 * And, similarly, a constant AND can be simplified 1020 * if we're testing against 0, i.e.: 1021 * 1022 * and #k nop 1023 * jeq #0 -> jset #k 1024 */ 1025 else if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) && 1026 b->s.k == 0) { 1027 b->s.k = last->s.k; 1028 b->s.code = BPF_JMP|BPF_K|BPF_JSET; 1029 last->s.code = NOP; 1030 opt_state->done = 0; 1031 opt_not(b); 1032 } 1033 } 1034 /* 1035 * jset #0 -> never 1036 * jset #ffffffff -> always 1037 */ 1038 if (b->s.code == (BPF_JMP|BPF_K|BPF_JSET)) { 1039 if (b->s.k == 0) 1040 JT(b) = JF(b); 1041 if ((u_int)b->s.k == 0xffffffffU) 1042 JF(b) = JT(b); 1043 } 1044 /* 1045 * If we're comparing against the index register, and the index 1046 * register is a known constant, we can just compare against that 1047 * constant. 1048 */ 1049 val = b->val[X_ATOM]; 1050 if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_X) { 1051 bpf_int32 v = opt_state->vmap[val].const_val; 1052 b->s.code &= ~BPF_X; 1053 b->s.k = v; 1054 } 1055 /* 1056 * If the accumulator is a known constant, we can compute the 1057 * comparison result. 1058 */ 1059 val = b->val[A_ATOM]; 1060 if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) { 1061 bpf_int32 v = opt_state->vmap[val].const_val; 1062 switch (BPF_OP(b->s.code)) { 1063 1064 case BPF_JEQ: 1065 v = v == b->s.k; 1066 break; 1067 1068 case BPF_JGT: 1069 v = (unsigned)v > (unsigned)b->s.k; 1070 break; 1071 1072 case BPF_JGE: 1073 v = (unsigned)v >= (unsigned)b->s.k; 1074 break; 1075 1076 case BPF_JSET: 1077 v &= b->s.k; 1078 break; 1079 1080 default: 1081 abort(); 1082 } 1083 if (JF(b) != JT(b)) 1084 opt_state->done = 0; 1085 if (v) 1086 JF(b) = JT(b); 1087 else 1088 JT(b) = JF(b); 1089 } 1090 } 1091 1092 /* 1093 * Compute the symbolic value of expression of 's', and update 1094 * anything it defines in the value table 'val'. If 'alter' is true, 1095 * do various optimizations. This code would be cleaner if symbolic 1096 * evaluation and code transformations weren't folded together. 1097 */ 1098 static void 1099 opt_stmt(opt_state_t *opt_state, struct stmt *s, int val[], int alter) 1100 { 1101 int op; 1102 int v; 1103 1104 switch (s->code) { 1105 1106 case BPF_LD|BPF_ABS|BPF_W: 1107 case BPF_LD|BPF_ABS|BPF_H: 1108 case BPF_LD|BPF_ABS|BPF_B: 1109 v = F(opt_state, s->code, s->k, 0L); 1110 vstore(s, &val[A_ATOM], v, alter); 1111 break; 1112 1113 case BPF_LD|BPF_IND|BPF_W: 1114 case BPF_LD|BPF_IND|BPF_H: 1115 case BPF_LD|BPF_IND|BPF_B: 1116 v = val[X_ATOM]; 1117 if (alter && opt_state->vmap[v].is_const) { 1118 s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code); 1119 s->k += opt_state->vmap[v].const_val; 1120 v = F(opt_state, s->code, s->k, 0L); 1121 opt_state->done = 0; 1122 } 1123 else 1124 v = F(opt_state, s->code, s->k, v); 1125 vstore(s, &val[A_ATOM], v, alter); 1126 break; 1127 1128 case BPF_LD|BPF_LEN: 1129 v = F(opt_state, s->code, 0L, 0L); 1130 vstore(s, &val[A_ATOM], v, alter); 1131 break; 1132 1133 case BPF_LD|BPF_IMM: 1134 v = K(s->k); 1135 vstore(s, &val[A_ATOM], v, alter); 1136 break; 1137 1138 case BPF_LDX|BPF_IMM: 1139 v = K(s->k); 1140 vstore(s, &val[X_ATOM], v, alter); 1141 break; 1142 1143 case BPF_LDX|BPF_MSH|BPF_B: 1144 v = F(opt_state, s->code, s->k, 0L); 1145 vstore(s, &val[X_ATOM], v, alter); 1146 break; 1147 1148 case BPF_ALU|BPF_NEG: 1149 if (alter && opt_state->vmap[val[A_ATOM]].is_const) { 1150 s->code = BPF_LD|BPF_IMM; 1151 /* 1152 * Do this negation as unsigned arithmetic; that's 1153 * what modern BPF engines do, and it guarantees 1154 * that all possible values can be negated. (Yeah, 1155 * negating 0x80000000, the minimum signed 32-bit 1156 * two's-complement value, results in 0x80000000, 1157 * so it's still negative, but we *should* be doing 1158 * all unsigned arithmetic here, to match what 1159 * modern BPF engines do.) 1160 * 1161 * Express it as 0U - (unsigned value) so that we 1162 * don't get compiler warnings about negating an 1163 * unsigned value and don't get UBSan warnings 1164 * about the result of negating 0x80000000 being 1165 * undefined. 1166 */ 1167 s->k = 0U - (bpf_u_int32)(opt_state->vmap[val[A_ATOM]].const_val); 1168 val[A_ATOM] = K(s->k); 1169 } 1170 else 1171 val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], 0L); 1172 break; 1173 1174 case BPF_ALU|BPF_ADD|BPF_K: 1175 case BPF_ALU|BPF_SUB|BPF_K: 1176 case BPF_ALU|BPF_MUL|BPF_K: 1177 case BPF_ALU|BPF_DIV|BPF_K: 1178 case BPF_ALU|BPF_MOD|BPF_K: 1179 case BPF_ALU|BPF_AND|BPF_K: 1180 case BPF_ALU|BPF_OR|BPF_K: 1181 case BPF_ALU|BPF_XOR|BPF_K: 1182 case BPF_ALU|BPF_LSH|BPF_K: 1183 case BPF_ALU|BPF_RSH|BPF_K: 1184 op = BPF_OP(s->code); 1185 if (alter) { 1186 if (s->k == 0) { 1187 /* 1188 * Optimize operations where the constant 1189 * is zero. 1190 * 1191 * Don't optimize away "sub #0" 1192 * as it may be needed later to 1193 * fixup the generated math code. 1194 * 1195 * Fail if we're dividing by zero or taking 1196 * a modulus by zero. 1197 */ 1198 if (op == BPF_ADD || 1199 op == BPF_LSH || op == BPF_RSH || 1200 op == BPF_OR || op == BPF_XOR) { 1201 s->code = NOP; 1202 break; 1203 } 1204 if (op == BPF_MUL || op == BPF_AND) { 1205 s->code = BPF_LD|BPF_IMM; 1206 val[A_ATOM] = K(s->k); 1207 break; 1208 } 1209 if (op == BPF_DIV) 1210 opt_error(opt_state, 1211 "division by zero"); 1212 if (op == BPF_MOD) 1213 opt_error(opt_state, 1214 "modulus by zero"); 1215 } 1216 if (opt_state->vmap[val[A_ATOM]].is_const) { 1217 fold_op(opt_state, s, val[A_ATOM], K(s->k)); 1218 val[A_ATOM] = K(s->k); 1219 break; 1220 } 1221 } 1222 val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], K(s->k)); 1223 break; 1224 1225 case BPF_ALU|BPF_ADD|BPF_X: 1226 case BPF_ALU|BPF_SUB|BPF_X: 1227 case BPF_ALU|BPF_MUL|BPF_X: 1228 case BPF_ALU|BPF_DIV|BPF_X: 1229 case BPF_ALU|BPF_MOD|BPF_X: 1230 case BPF_ALU|BPF_AND|BPF_X: 1231 case BPF_ALU|BPF_OR|BPF_X: 1232 case BPF_ALU|BPF_XOR|BPF_X: 1233 case BPF_ALU|BPF_LSH|BPF_X: 1234 case BPF_ALU|BPF_RSH|BPF_X: 1235 op = BPF_OP(s->code); 1236 if (alter && opt_state->vmap[val[X_ATOM]].is_const) { 1237 if (opt_state->vmap[val[A_ATOM]].is_const) { 1238 fold_op(opt_state, s, val[A_ATOM], val[X_ATOM]); 1239 val[A_ATOM] = K(s->k); 1240 } 1241 else { 1242 s->code = BPF_ALU|BPF_K|op; 1243 s->k = opt_state->vmap[val[X_ATOM]].const_val; 1244 /* 1245 * XXX - we need to make up our minds 1246 * as to what integers are signed and 1247 * what integers are unsigned in BPF 1248 * programs and in our IR. 1249 */ 1250 if ((op == BPF_LSH || op == BPF_RSH) && 1251 (s->k < 0 || s->k > 31)) 1252 opt_error(opt_state, 1253 "shift by more than 31 bits"); 1254 opt_state->done = 0; 1255 val[A_ATOM] = 1256 F(opt_state, s->code, val[A_ATOM], K(s->k)); 1257 } 1258 break; 1259 } 1260 /* 1261 * Check if we're doing something to an accumulator 1262 * that is 0, and simplify. This may not seem like 1263 * much of a simplification but it could open up further 1264 * optimizations. 1265 * XXX We could also check for mul by 1, etc. 1266 */ 1267 if (alter && opt_state->vmap[val[A_ATOM]].is_const 1268 && opt_state->vmap[val[A_ATOM]].const_val == 0) { 1269 if (op == BPF_ADD || op == BPF_OR || op == BPF_XOR) { 1270 s->code = BPF_MISC|BPF_TXA; 1271 vstore(s, &val[A_ATOM], val[X_ATOM], alter); 1272 break; 1273 } 1274 else if (op == BPF_MUL || op == BPF_DIV || op == BPF_MOD || 1275 op == BPF_AND || op == BPF_LSH || op == BPF_RSH) { 1276 s->code = BPF_LD|BPF_IMM; 1277 s->k = 0; 1278 vstore(s, &val[A_ATOM], K(s->k), alter); 1279 break; 1280 } 1281 else if (op == BPF_NEG) { 1282 s->code = NOP; 1283 break; 1284 } 1285 } 1286 val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], val[X_ATOM]); 1287 break; 1288 1289 case BPF_MISC|BPF_TXA: 1290 vstore(s, &val[A_ATOM], val[X_ATOM], alter); 1291 break; 1292 1293 case BPF_LD|BPF_MEM: 1294 v = val[s->k]; 1295 if (alter && opt_state->vmap[v].is_const) { 1296 s->code = BPF_LD|BPF_IMM; 1297 s->k = opt_state->vmap[v].const_val; 1298 opt_state->done = 0; 1299 } 1300 vstore(s, &val[A_ATOM], v, alter); 1301 break; 1302 1303 case BPF_MISC|BPF_TAX: 1304 vstore(s, &val[X_ATOM], val[A_ATOM], alter); 1305 break; 1306 1307 case BPF_LDX|BPF_MEM: 1308 v = val[s->k]; 1309 if (alter && opt_state->vmap[v].is_const) { 1310 s->code = BPF_LDX|BPF_IMM; 1311 s->k = opt_state->vmap[v].const_val; 1312 opt_state->done = 0; 1313 } 1314 vstore(s, &val[X_ATOM], v, alter); 1315 break; 1316 1317 case BPF_ST: 1318 vstore(s, &val[s->k], val[A_ATOM], alter); 1319 break; 1320 1321 case BPF_STX: 1322 vstore(s, &val[s->k], val[X_ATOM], alter); 1323 break; 1324 } 1325 } 1326 1327 static void 1328 deadstmt(opt_state_t *opt_state, register struct stmt *s, register struct stmt *last[]) 1329 { 1330 register int atom; 1331 1332 atom = atomuse(s); 1333 if (atom >= 0) { 1334 if (atom == AX_ATOM) { 1335 last[X_ATOM] = 0; 1336 last[A_ATOM] = 0; 1337 } 1338 else 1339 last[atom] = 0; 1340 } 1341 atom = atomdef(s); 1342 if (atom >= 0) { 1343 if (last[atom]) { 1344 opt_state->done = 0; 1345 last[atom]->code = NOP; 1346 } 1347 last[atom] = s; 1348 } 1349 } 1350 1351 static void 1352 opt_deadstores(opt_state_t *opt_state, register struct block *b) 1353 { 1354 register struct slist *s; 1355 register int atom; 1356 struct stmt *last[N_ATOMS]; 1357 1358 memset((char *)last, 0, sizeof last); 1359 1360 for (s = b->stmts; s != 0; s = s->next) 1361 deadstmt(opt_state, &s->s, last); 1362 deadstmt(opt_state, &b->s, last); 1363 1364 for (atom = 0; atom < N_ATOMS; ++atom) 1365 if (last[atom] && !ATOMELEM(b->out_use, atom)) { 1366 last[atom]->code = NOP; 1367 opt_state->done = 0; 1368 } 1369 } 1370 1371 static void 1372 opt_blk(opt_state_t *opt_state, struct block *b, int do_stmts) 1373 { 1374 struct slist *s; 1375 struct edge *p; 1376 int i; 1377 bpf_int32 aval, xval; 1378 1379 #if 0 1380 for (s = b->stmts; s && s->next; s = s->next) 1381 if (BPF_CLASS(s->s.code) == BPF_JMP) { 1382 do_stmts = 0; 1383 break; 1384 } 1385 #endif 1386 1387 /* 1388 * Initialize the atom values. 1389 */ 1390 p = b->in_edges; 1391 if (p == 0) { 1392 /* 1393 * We have no predecessors, so everything is undefined 1394 * upon entry to this block. 1395 */ 1396 memset((char *)b->val, 0, sizeof(b->val)); 1397 } else { 1398 /* 1399 * Inherit values from our predecessors. 1400 * 1401 * First, get the values from the predecessor along the 1402 * first edge leading to this node. 1403 */ 1404 memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val)); 1405 /* 1406 * Now look at all the other nodes leading to this node. 1407 * If, for the predecessor along that edge, a register 1408 * has a different value from the one we have (i.e., 1409 * control paths are merging, and the merging paths 1410 * assign different values to that register), give the 1411 * register the undefined value of 0. 1412 */ 1413 while ((p = p->next) != NULL) { 1414 for (i = 0; i < N_ATOMS; ++i) 1415 if (b->val[i] != p->pred->val[i]) 1416 b->val[i] = 0; 1417 } 1418 } 1419 aval = b->val[A_ATOM]; 1420 xval = b->val[X_ATOM]; 1421 for (s = b->stmts; s; s = s->next) 1422 opt_stmt(opt_state, &s->s, b->val, do_stmts); 1423 1424 /* 1425 * This is a special case: if we don't use anything from this 1426 * block, and we load the accumulator or index register with a 1427 * value that is already there, or if this block is a return, 1428 * eliminate all the statements. 1429 * 1430 * XXX - what if it does a store? 1431 * 1432 * XXX - why does it matter whether we use anything from this 1433 * block? If the accumulator or index register doesn't change 1434 * its value, isn't that OK even if we use that value? 1435 * 1436 * XXX - if we load the accumulator with a different value, 1437 * and the block ends with a conditional branch, we obviously 1438 * can't eliminate it, as the branch depends on that value. 1439 * For the index register, the conditional branch only depends 1440 * on the index register value if the test is against the index 1441 * register value rather than a constant; if nothing uses the 1442 * value we put into the index register, and we're not testing 1443 * against the index register's value, and there aren't any 1444 * other problems that would keep us from eliminating this 1445 * block, can we eliminate it? 1446 */ 1447 if (do_stmts && 1448 ((b->out_use == 0 && 1449 aval != VAL_UNKNOWN && b->val[A_ATOM] == aval && 1450 xval != VAL_UNKNOWN && b->val[X_ATOM] == xval) || 1451 BPF_CLASS(b->s.code) == BPF_RET)) { 1452 if (b->stmts != 0) { 1453 b->stmts = 0; 1454 opt_state->done = 0; 1455 } 1456 } else { 1457 opt_peep(opt_state, b); 1458 opt_deadstores(opt_state, b); 1459 } 1460 /* 1461 * Set up values for branch optimizer. 1462 */ 1463 if (BPF_SRC(b->s.code) == BPF_K) 1464 b->oval = K(b->s.k); 1465 else 1466 b->oval = b->val[X_ATOM]; 1467 b->et.code = b->s.code; 1468 b->ef.code = -b->s.code; 1469 } 1470 1471 /* 1472 * Return true if any register that is used on exit from 'succ', has 1473 * an exit value that is different from the corresponding exit value 1474 * from 'b'. 1475 */ 1476 static int 1477 use_conflict(struct block *b, struct block *succ) 1478 { 1479 int atom; 1480 atomset use = succ->out_use; 1481 1482 if (use == 0) 1483 return 0; 1484 1485 for (atom = 0; atom < N_ATOMS; ++atom) 1486 if (ATOMELEM(use, atom)) 1487 if (b->val[atom] != succ->val[atom]) 1488 return 1; 1489 return 0; 1490 } 1491 1492 static struct block * 1493 fold_edge(struct block *child, struct edge *ep) 1494 { 1495 int sense; 1496 int aval0, aval1, oval0, oval1; 1497 int code = ep->code; 1498 1499 if (code < 0) { 1500 code = -code; 1501 sense = 0; 1502 } else 1503 sense = 1; 1504 1505 if (child->s.code != code) 1506 return 0; 1507 1508 aval0 = child->val[A_ATOM]; 1509 oval0 = child->oval; 1510 aval1 = ep->pred->val[A_ATOM]; 1511 oval1 = ep->pred->oval; 1512 1513 if (aval0 != aval1) 1514 return 0; 1515 1516 if (oval0 == oval1) 1517 /* 1518 * The operands of the branch instructions are 1519 * identical, so the result is true if a true 1520 * branch was taken to get here, otherwise false. 1521 */ 1522 return sense ? JT(child) : JF(child); 1523 1524 if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K)) 1525 /* 1526 * At this point, we only know the comparison if we 1527 * came down the true branch, and it was an equality 1528 * comparison with a constant. 1529 * 1530 * I.e., if we came down the true branch, and the branch 1531 * was an equality comparison with a constant, we know the 1532 * accumulator contains that constant. If we came down 1533 * the false branch, or the comparison wasn't with a 1534 * constant, we don't know what was in the accumulator. 1535 * 1536 * We rely on the fact that distinct constants have distinct 1537 * value numbers. 1538 */ 1539 return JF(child); 1540 1541 return 0; 1542 } 1543 1544 static void 1545 opt_j(opt_state_t *opt_state, struct edge *ep) 1546 { 1547 register int i, k; 1548 register struct block *target; 1549 1550 if (JT(ep->succ) == 0) 1551 return; 1552 1553 if (JT(ep->succ) == JF(ep->succ)) { 1554 /* 1555 * Common branch targets can be eliminated, provided 1556 * there is no data dependency. 1557 */ 1558 if (!use_conflict(ep->pred, ep->succ->et.succ)) { 1559 opt_state->done = 0; 1560 ep->succ = JT(ep->succ); 1561 } 1562 } 1563 /* 1564 * For each edge dominator that matches the successor of this 1565 * edge, promote the edge successor to the its grandchild. 1566 * 1567 * XXX We violate the set abstraction here in favor a reasonably 1568 * efficient loop. 1569 */ 1570 top: 1571 for (i = 0; i < opt_state->edgewords; ++i) { 1572 register bpf_u_int32 x = ep->edom[i]; 1573 1574 while (x != 0) { 1575 k = lowest_set_bit(x); 1576 x &=~ ((bpf_u_int32)1 << k); 1577 k += i * BITS_PER_WORD; 1578 1579 target = fold_edge(ep->succ, opt_state->edges[k]); 1580 /* 1581 * Check that there is no data dependency between 1582 * nodes that will be violated if we move the edge. 1583 */ 1584 if (target != 0 && !use_conflict(ep->pred, target)) { 1585 opt_state->done = 0; 1586 ep->succ = target; 1587 if (JT(target) != 0) 1588 /* 1589 * Start over unless we hit a leaf. 1590 */ 1591 goto top; 1592 return; 1593 } 1594 } 1595 } 1596 } 1597 1598 1599 static void 1600 or_pullup(opt_state_t *opt_state, struct block *b) 1601 { 1602 int val, at_top; 1603 struct block *pull; 1604 struct block **diffp, **samep; 1605 struct edge *ep; 1606 1607 ep = b->in_edges; 1608 if (ep == 0) 1609 return; 1610 1611 /* 1612 * Make sure each predecessor loads the same value. 1613 * XXX why? 1614 */ 1615 val = ep->pred->val[A_ATOM]; 1616 for (ep = ep->next; ep != 0; ep = ep->next) 1617 if (val != ep->pred->val[A_ATOM]) 1618 return; 1619 1620 if (JT(b->in_edges->pred) == b) 1621 diffp = &JT(b->in_edges->pred); 1622 else 1623 diffp = &JF(b->in_edges->pred); 1624 1625 at_top = 1; 1626 for (;;) { 1627 if (*diffp == 0) 1628 return; 1629 1630 if (JT(*diffp) != JT(b)) 1631 return; 1632 1633 if (!SET_MEMBER((*diffp)->dom, b->id)) 1634 return; 1635 1636 if ((*diffp)->val[A_ATOM] != val) 1637 break; 1638 1639 diffp = &JF(*diffp); 1640 at_top = 0; 1641 } 1642 samep = &JF(*diffp); 1643 for (;;) { 1644 if (*samep == 0) 1645 return; 1646 1647 if (JT(*samep) != JT(b)) 1648 return; 1649 1650 if (!SET_MEMBER((*samep)->dom, b->id)) 1651 return; 1652 1653 if ((*samep)->val[A_ATOM] == val) 1654 break; 1655 1656 /* XXX Need to check that there are no data dependencies 1657 between dp0 and dp1. Currently, the code generator 1658 will not produce such dependencies. */ 1659 samep = &JF(*samep); 1660 } 1661 #ifdef notdef 1662 /* XXX This doesn't cover everything. */ 1663 for (i = 0; i < N_ATOMS; ++i) 1664 if ((*samep)->val[i] != pred->val[i]) 1665 return; 1666 #endif 1667 /* Pull up the node. */ 1668 pull = *samep; 1669 *samep = JF(pull); 1670 JF(pull) = *diffp; 1671 1672 /* 1673 * At the top of the chain, each predecessor needs to point at the 1674 * pulled up node. Inside the chain, there is only one predecessor 1675 * to worry about. 1676 */ 1677 if (at_top) { 1678 for (ep = b->in_edges; ep != 0; ep = ep->next) { 1679 if (JT(ep->pred) == b) 1680 JT(ep->pred) = pull; 1681 else 1682 JF(ep->pred) = pull; 1683 } 1684 } 1685 else 1686 *diffp = pull; 1687 1688 opt_state->done = 0; 1689 } 1690 1691 static void 1692 and_pullup(opt_state_t *opt_state, struct block *b) 1693 { 1694 int val, at_top; 1695 struct block *pull; 1696 struct block **diffp, **samep; 1697 struct edge *ep; 1698 1699 ep = b->in_edges; 1700 if (ep == 0) 1701 return; 1702 1703 /* 1704 * Make sure each predecessor loads the same value. 1705 */ 1706 val = ep->pred->val[A_ATOM]; 1707 for (ep = ep->next; ep != 0; ep = ep->next) 1708 if (val != ep->pred->val[A_ATOM]) 1709 return; 1710 1711 if (JT(b->in_edges->pred) == b) 1712 diffp = &JT(b->in_edges->pred); 1713 else 1714 diffp = &JF(b->in_edges->pred); 1715 1716 at_top = 1; 1717 for (;;) { 1718 if (*diffp == 0) 1719 return; 1720 1721 if (JF(*diffp) != JF(b)) 1722 return; 1723 1724 if (!SET_MEMBER((*diffp)->dom, b->id)) 1725 return; 1726 1727 if ((*diffp)->val[A_ATOM] != val) 1728 break; 1729 1730 diffp = &JT(*diffp); 1731 at_top = 0; 1732 } 1733 samep = &JT(*diffp); 1734 for (;;) { 1735 if (*samep == 0) 1736 return; 1737 1738 if (JF(*samep) != JF(b)) 1739 return; 1740 1741 if (!SET_MEMBER((*samep)->dom, b->id)) 1742 return; 1743 1744 if ((*samep)->val[A_ATOM] == val) 1745 break; 1746 1747 /* XXX Need to check that there are no data dependencies 1748 between diffp and samep. Currently, the code generator 1749 will not produce such dependencies. */ 1750 samep = &JT(*samep); 1751 } 1752 #ifdef notdef 1753 /* XXX This doesn't cover everything. */ 1754 for (i = 0; i < N_ATOMS; ++i) 1755 if ((*samep)->val[i] != pred->val[i]) 1756 return; 1757 #endif 1758 /* Pull up the node. */ 1759 pull = *samep; 1760 *samep = JT(pull); 1761 JT(pull) = *diffp; 1762 1763 /* 1764 * At the top of the chain, each predecessor needs to point at the 1765 * pulled up node. Inside the chain, there is only one predecessor 1766 * to worry about. 1767 */ 1768 if (at_top) { 1769 for (ep = b->in_edges; ep != 0; ep = ep->next) { 1770 if (JT(ep->pred) == b) 1771 JT(ep->pred) = pull; 1772 else 1773 JF(ep->pred) = pull; 1774 } 1775 } 1776 else 1777 *diffp = pull; 1778 1779 opt_state->done = 0; 1780 } 1781 1782 static void 1783 opt_blks(opt_state_t *opt_state, struct icode *ic, int do_stmts) 1784 { 1785 int i, maxlevel; 1786 struct block *p; 1787 1788 init_val(opt_state); 1789 maxlevel = ic->root->level; 1790 1791 find_inedges(opt_state, ic->root); 1792 for (i = maxlevel; i >= 0; --i) 1793 for (p = opt_state->levels[i]; p; p = p->link) 1794 opt_blk(opt_state, p, do_stmts); 1795 1796 if (do_stmts) 1797 /* 1798 * No point trying to move branches; it can't possibly 1799 * make a difference at this point. 1800 */ 1801 return; 1802 1803 for (i = 1; i <= maxlevel; ++i) { 1804 for (p = opt_state->levels[i]; p; p = p->link) { 1805 opt_j(opt_state, &p->et); 1806 opt_j(opt_state, &p->ef); 1807 } 1808 } 1809 1810 find_inedges(opt_state, ic->root); 1811 for (i = 1; i <= maxlevel; ++i) { 1812 for (p = opt_state->levels[i]; p; p = p->link) { 1813 or_pullup(opt_state, p); 1814 and_pullup(opt_state, p); 1815 } 1816 } 1817 } 1818 1819 static inline void 1820 link_inedge(struct edge *parent, struct block *child) 1821 { 1822 parent->next = child->in_edges; 1823 child->in_edges = parent; 1824 } 1825 1826 static void 1827 find_inedges(opt_state_t *opt_state, struct block *root) 1828 { 1829 int i; 1830 struct block *b; 1831 1832 for (i = 0; i < opt_state->n_blocks; ++i) 1833 opt_state->blocks[i]->in_edges = 0; 1834 1835 /* 1836 * Traverse the graph, adding each edge to the predecessor 1837 * list of its successors. Skip the leaves (i.e. level 0). 1838 */ 1839 for (i = root->level; i > 0; --i) { 1840 for (b = opt_state->levels[i]; b != 0; b = b->link) { 1841 link_inedge(&b->et, JT(b)); 1842 link_inedge(&b->ef, JF(b)); 1843 } 1844 } 1845 } 1846 1847 static void 1848 opt_root(struct block **b) 1849 { 1850 struct slist *tmp, *s; 1851 1852 s = (*b)->stmts; 1853 (*b)->stmts = 0; 1854 while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b)) 1855 *b = JT(*b); 1856 1857 tmp = (*b)->stmts; 1858 if (tmp != 0) 1859 sappend(s, tmp); 1860 (*b)->stmts = s; 1861 1862 /* 1863 * If the root node is a return, then there is no 1864 * point executing any statements (since the bpf machine 1865 * has no side effects). 1866 */ 1867 if (BPF_CLASS((*b)->s.code) == BPF_RET) 1868 (*b)->stmts = 0; 1869 } 1870 1871 static void 1872 opt_loop(opt_state_t *opt_state, struct icode *ic, int do_stmts) 1873 { 1874 1875 #ifdef BDEBUG 1876 if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) { 1877 printf("opt_loop(root, %d) begin\n", do_stmts); 1878 opt_dump(opt_state, ic); 1879 } 1880 #endif 1881 do { 1882 opt_state->done = 1; 1883 find_levels(opt_state, ic); 1884 find_dom(opt_state, ic->root); 1885 find_closure(opt_state, ic->root); 1886 find_ud(opt_state, ic->root); 1887 find_edom(opt_state, ic->root); 1888 opt_blks(opt_state, ic, do_stmts); 1889 #ifdef BDEBUG 1890 if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) { 1891 printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, opt_state->done); 1892 opt_dump(opt_state, ic); 1893 } 1894 #endif 1895 } while (!opt_state->done); 1896 } 1897 1898 /* 1899 * Optimize the filter code in its dag representation. 1900 * Return 0 on success, -1 on error. 1901 */ 1902 int 1903 bpf_optimize(struct icode *ic, char *errbuf) 1904 { 1905 opt_state_t opt_state; 1906 1907 memset(&opt_state, 0, sizeof(opt_state)); 1908 opt_state.errbuf = errbuf; 1909 if (setjmp(opt_state.top_ctx)) { 1910 opt_cleanup(&opt_state); 1911 return -1; 1912 } 1913 opt_init(&opt_state, ic); 1914 opt_loop(&opt_state, ic, 0); 1915 opt_loop(&opt_state, ic, 1); 1916 intern_blocks(&opt_state, ic); 1917 #ifdef BDEBUG 1918 if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) { 1919 printf("after intern_blocks()\n"); 1920 opt_dump(&opt_state, ic); 1921 } 1922 #endif 1923 opt_root(&ic->root); 1924 #ifdef BDEBUG 1925 if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) { 1926 printf("after opt_root()\n"); 1927 opt_dump(&opt_state, ic); 1928 } 1929 #endif 1930 opt_cleanup(&opt_state); 1931 return 0; 1932 } 1933 1934 static void 1935 make_marks(struct icode *ic, struct block *p) 1936 { 1937 if (!isMarked(ic, p)) { 1938 Mark(ic, p); 1939 if (BPF_CLASS(p->s.code) != BPF_RET) { 1940 make_marks(ic, JT(p)); 1941 make_marks(ic, JF(p)); 1942 } 1943 } 1944 } 1945 1946 /* 1947 * Mark code array such that isMarked(ic->cur_mark, i) is true 1948 * only for nodes that are alive. 1949 */ 1950 static void 1951 mark_code(struct icode *ic) 1952 { 1953 ic->cur_mark += 1; 1954 make_marks(ic, ic->root); 1955 } 1956 1957 /* 1958 * True iff the two stmt lists load the same value from the packet into 1959 * the accumulator. 1960 */ 1961 static int 1962 eq_slist(struct slist *x, struct slist *y) 1963 { 1964 for (;;) { 1965 while (x && x->s.code == NOP) 1966 x = x->next; 1967 while (y && y->s.code == NOP) 1968 y = y->next; 1969 if (x == 0) 1970 return y == 0; 1971 if (y == 0) 1972 return x == 0; 1973 if (x->s.code != y->s.code || x->s.k != y->s.k) 1974 return 0; 1975 x = x->next; 1976 y = y->next; 1977 } 1978 } 1979 1980 static inline int 1981 eq_blk(struct block *b0, struct block *b1) 1982 { 1983 if (b0->s.code == b1->s.code && 1984 b0->s.k == b1->s.k && 1985 b0->et.succ == b1->et.succ && 1986 b0->ef.succ == b1->ef.succ) 1987 return eq_slist(b0->stmts, b1->stmts); 1988 return 0; 1989 } 1990 1991 static void 1992 intern_blocks(opt_state_t *opt_state, struct icode *ic) 1993 { 1994 struct block *p; 1995 int i, j; 1996 int done1; /* don't shadow global */ 1997 top: 1998 done1 = 1; 1999 for (i = 0; i < opt_state->n_blocks; ++i) 2000 opt_state->blocks[i]->link = 0; 2001 2002 mark_code(ic); 2003 2004 for (i = opt_state->n_blocks - 1; --i >= 0; ) { 2005 if (!isMarked(ic, opt_state->blocks[i])) 2006 continue; 2007 for (j = i + 1; j < opt_state->n_blocks; ++j) { 2008 if (!isMarked(ic, opt_state->blocks[j])) 2009 continue; 2010 if (eq_blk(opt_state->blocks[i], opt_state->blocks[j])) { 2011 opt_state->blocks[i]->link = opt_state->blocks[j]->link ? 2012 opt_state->blocks[j]->link : opt_state->blocks[j]; 2013 break; 2014 } 2015 } 2016 } 2017 for (i = 0; i < opt_state->n_blocks; ++i) { 2018 p = opt_state->blocks[i]; 2019 if (JT(p) == 0) 2020 continue; 2021 if (JT(p)->link) { 2022 done1 = 0; 2023 JT(p) = JT(p)->link; 2024 } 2025 if (JF(p)->link) { 2026 done1 = 0; 2027 JF(p) = JF(p)->link; 2028 } 2029 } 2030 if (!done1) 2031 goto top; 2032 } 2033 2034 static void 2035 opt_cleanup(opt_state_t *opt_state) 2036 { 2037 free((void *)opt_state->vnode_base); 2038 free((void *)opt_state->vmap); 2039 free((void *)opt_state->edges); 2040 free((void *)opt_state->space); 2041 free((void *)opt_state->levels); 2042 free((void *)opt_state->blocks); 2043 } 2044 2045 /* 2046 * For optimizer errors. 2047 */ 2048 static void PCAP_NORETURN 2049 opt_error(opt_state_t *opt_state, const char *fmt, ...) 2050 { 2051 va_list ap; 2052 2053 if (opt_state->errbuf != NULL) { 2054 va_start(ap, fmt); 2055 (void)pcap_vsnprintf(opt_state->errbuf, 2056 PCAP_ERRBUF_SIZE, fmt, ap); 2057 va_end(ap); 2058 } 2059 longjmp(opt_state->top_ctx, 1); 2060 /* NOTREACHED */ 2061 } 2062 2063 /* 2064 * Return the number of stmts in 's'. 2065 */ 2066 static u_int 2067 slength(struct slist *s) 2068 { 2069 u_int n = 0; 2070 2071 for (; s; s = s->next) 2072 if (s->s.code != NOP) 2073 ++n; 2074 return n; 2075 } 2076 2077 /* 2078 * Return the number of nodes reachable by 'p'. 2079 * All nodes should be initially unmarked. 2080 */ 2081 static int 2082 count_blocks(struct icode *ic, struct block *p) 2083 { 2084 if (p == 0 || isMarked(ic, p)) 2085 return 0; 2086 Mark(ic, p); 2087 return count_blocks(ic, JT(p)) + count_blocks(ic, JF(p)) + 1; 2088 } 2089 2090 /* 2091 * Do a depth first search on the flow graph, numbering the 2092 * the basic blocks, and entering them into the 'blocks' array.` 2093 */ 2094 static void 2095 number_blks_r(opt_state_t *opt_state, struct icode *ic, struct block *p) 2096 { 2097 int n; 2098 2099 if (p == 0 || isMarked(ic, p)) 2100 return; 2101 2102 Mark(ic, p); 2103 n = opt_state->n_blocks++; 2104 p->id = n; 2105 opt_state->blocks[n] = p; 2106 2107 number_blks_r(opt_state, ic, JT(p)); 2108 number_blks_r(opt_state, ic, JF(p)); 2109 } 2110 2111 /* 2112 * Return the number of stmts in the flowgraph reachable by 'p'. 2113 * The nodes should be unmarked before calling. 2114 * 2115 * Note that "stmts" means "instructions", and that this includes 2116 * 2117 * side-effect statements in 'p' (slength(p->stmts)); 2118 * 2119 * statements in the true branch from 'p' (count_stmts(JT(p))); 2120 * 2121 * statements in the false branch from 'p' (count_stmts(JF(p))); 2122 * 2123 * the conditional jump itself (1); 2124 * 2125 * an extra long jump if the true branch requires it (p->longjt); 2126 * 2127 * an extra long jump if the false branch requires it (p->longjf). 2128 */ 2129 static u_int 2130 count_stmts(struct icode *ic, struct block *p) 2131 { 2132 u_int n; 2133 2134 if (p == 0 || isMarked(ic, p)) 2135 return 0; 2136 Mark(ic, p); 2137 n = count_stmts(ic, JT(p)) + count_stmts(ic, JF(p)); 2138 return slength(p->stmts) + n + 1 + p->longjt + p->longjf; 2139 } 2140 2141 /* 2142 * Allocate memory. All allocation is done before optimization 2143 * is begun. A linear bound on the size of all data structures is computed 2144 * from the total number of blocks and/or statements. 2145 */ 2146 static void 2147 opt_init(opt_state_t *opt_state, struct icode *ic) 2148 { 2149 bpf_u_int32 *p; 2150 int i, n, max_stmts; 2151 2152 /* 2153 * First, count the blocks, so we can malloc an array to map 2154 * block number to block. Then, put the blocks into the array. 2155 */ 2156 unMarkAll(ic); 2157 n = count_blocks(ic, ic->root); 2158 opt_state->blocks = (struct block **)calloc(n, sizeof(*opt_state->blocks)); 2159 if (opt_state->blocks == NULL) 2160 opt_error(opt_state, "malloc"); 2161 unMarkAll(ic); 2162 opt_state->n_blocks = 0; 2163 number_blks_r(opt_state, ic, ic->root); 2164 2165 opt_state->n_edges = 2 * opt_state->n_blocks; 2166 opt_state->edges = (struct edge **)calloc(opt_state->n_edges, sizeof(*opt_state->edges)); 2167 if (opt_state->edges == NULL) { 2168 opt_error(opt_state, "malloc"); 2169 } 2170 2171 /* 2172 * The number of levels is bounded by the number of nodes. 2173 */ 2174 opt_state->levels = (struct block **)calloc(opt_state->n_blocks, sizeof(*opt_state->levels)); 2175 if (opt_state->levels == NULL) { 2176 opt_error(opt_state, "malloc"); 2177 } 2178 2179 opt_state->edgewords = opt_state->n_edges / (8 * sizeof(bpf_u_int32)) + 1; 2180 opt_state->nodewords = opt_state->n_blocks / (8 * sizeof(bpf_u_int32)) + 1; 2181 2182 /* XXX */ 2183 opt_state->space = (bpf_u_int32 *)malloc(2 * opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->space) 2184 + opt_state->n_edges * opt_state->edgewords * sizeof(*opt_state->space)); 2185 if (opt_state->space == NULL) { 2186 opt_error(opt_state, "malloc"); 2187 } 2188 p = opt_state->space; 2189 opt_state->all_dom_sets = p; 2190 for (i = 0; i < n; ++i) { 2191 opt_state->blocks[i]->dom = p; 2192 p += opt_state->nodewords; 2193 } 2194 opt_state->all_closure_sets = p; 2195 for (i = 0; i < n; ++i) { 2196 opt_state->blocks[i]->closure = p; 2197 p += opt_state->nodewords; 2198 } 2199 opt_state->all_edge_sets = p; 2200 for (i = 0; i < n; ++i) { 2201 register struct block *b = opt_state->blocks[i]; 2202 2203 b->et.edom = p; 2204 p += opt_state->edgewords; 2205 b->ef.edom = p; 2206 p += opt_state->edgewords; 2207 b->et.id = i; 2208 opt_state->edges[i] = &b->et; 2209 b->ef.id = opt_state->n_blocks + i; 2210 opt_state->edges[opt_state->n_blocks + i] = &b->ef; 2211 b->et.pred = b; 2212 b->ef.pred = b; 2213 } 2214 max_stmts = 0; 2215 for (i = 0; i < n; ++i) 2216 max_stmts += slength(opt_state->blocks[i]->stmts) + 1; 2217 /* 2218 * We allocate at most 3 value numbers per statement, 2219 * so this is an upper bound on the number of valnodes 2220 * we'll need. 2221 */ 2222 opt_state->maxval = 3 * max_stmts; 2223 opt_state->vmap = (struct vmapinfo *)calloc(opt_state->maxval, sizeof(*opt_state->vmap)); 2224 if (opt_state->vmap == NULL) { 2225 opt_error(opt_state, "malloc"); 2226 } 2227 opt_state->vnode_base = (struct valnode *)calloc(opt_state->maxval, sizeof(*opt_state->vnode_base)); 2228 if (opt_state->vnode_base == NULL) { 2229 opt_error(opt_state, "malloc"); 2230 } 2231 } 2232 2233 /* 2234 * This is only used when supporting optimizer debugging. It is 2235 * global state, so do *not* do more than one compile in parallel 2236 * and expect it to provide meaningful information. 2237 */ 2238 #ifdef BDEBUG 2239 int bids[NBIDS]; 2240 #endif 2241 2242 static void PCAP_NORETURN conv_error(conv_state_t *, const char *, ...) 2243 PCAP_PRINTFLIKE(2, 3); 2244 2245 /* 2246 * Returns true if successful. Returns false if a branch has 2247 * an offset that is too large. If so, we have marked that 2248 * branch so that on a subsequent iteration, it will be treated 2249 * properly. 2250 */ 2251 static int 2252 convert_code_r(conv_state_t *conv_state, struct icode *ic, struct block *p) 2253 { 2254 struct bpf_insn *dst; 2255 struct slist *src; 2256 u_int slen; 2257 u_int off; 2258 u_int extrajmps; /* number of extra jumps inserted */ 2259 struct slist **offset = NULL; 2260 2261 if (p == 0 || isMarked(ic, p)) 2262 return (1); 2263 Mark(ic, p); 2264 2265 if (convert_code_r(conv_state, ic, JF(p)) == 0) 2266 return (0); 2267 if (convert_code_r(conv_state, ic, JT(p)) == 0) 2268 return (0); 2269 2270 slen = slength(p->stmts); 2271 dst = conv_state->ftail -= (slen + 1 + p->longjt + p->longjf); 2272 /* inflate length by any extra jumps */ 2273 2274 p->offset = (int)(dst - conv_state->fstart); 2275 2276 /* generate offset[] for convenience */ 2277 if (slen) { 2278 offset = (struct slist **)calloc(slen, sizeof(struct slist *)); 2279 if (!offset) { 2280 conv_error(conv_state, "not enough core"); 2281 /*NOTREACHED*/ 2282 } 2283 } 2284 src = p->stmts; 2285 for (off = 0; off < slen && src; off++) { 2286 #if 0 2287 printf("off=%d src=%x\n", off, src); 2288 #endif 2289 offset[off] = src; 2290 src = src->next; 2291 } 2292 2293 off = 0; 2294 for (src = p->stmts; src; src = src->next) { 2295 if (src->s.code == NOP) 2296 continue; 2297 dst->code = (u_short)src->s.code; 2298 dst->k = src->s.k; 2299 2300 /* fill block-local relative jump */ 2301 if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) { 2302 #if 0 2303 if (src->s.jt || src->s.jf) { 2304 free(offset); 2305 conv_error(conv_state, "illegal jmp destination"); 2306 /*NOTREACHED*/ 2307 } 2308 #endif 2309 goto filled; 2310 } 2311 if (off == slen - 2) /*???*/ 2312 goto filled; 2313 2314 { 2315 u_int i; 2316 int jt, jf; 2317 const char ljerr[] = "%s for block-local relative jump: off=%d"; 2318 2319 #if 0 2320 printf("code=%x off=%d %x %x\n", src->s.code, 2321 off, src->s.jt, src->s.jf); 2322 #endif 2323 2324 if (!src->s.jt || !src->s.jf) { 2325 free(offset); 2326 conv_error(conv_state, ljerr, "no jmp destination", off); 2327 /*NOTREACHED*/ 2328 } 2329 2330 jt = jf = 0; 2331 for (i = 0; i < slen; i++) { 2332 if (offset[i] == src->s.jt) { 2333 if (jt) { 2334 free(offset); 2335 conv_error(conv_state, ljerr, "multiple matches", off); 2336 /*NOTREACHED*/ 2337 } 2338 2339 if (i - off - 1 >= 256) { 2340 free(offset); 2341 conv_error(conv_state, ljerr, "out-of-range jump", off); 2342 /*NOTREACHED*/ 2343 } 2344 dst->jt = (u_char)(i - off - 1); 2345 jt++; 2346 } 2347 if (offset[i] == src->s.jf) { 2348 if (jf) { 2349 free(offset); 2350 conv_error(conv_state, ljerr, "multiple matches", off); 2351 /*NOTREACHED*/ 2352 } 2353 if (i - off - 1 >= 256) { 2354 free(offset); 2355 conv_error(conv_state, ljerr, "out-of-range jump", off); 2356 /*NOTREACHED*/ 2357 } 2358 dst->jf = (u_char)(i - off - 1); 2359 jf++; 2360 } 2361 } 2362 if (!jt || !jf) { 2363 free(offset); 2364 conv_error(conv_state, ljerr, "no destination found", off); 2365 /*NOTREACHED*/ 2366 } 2367 } 2368 filled: 2369 ++dst; 2370 ++off; 2371 } 2372 if (offset) 2373 free(offset); 2374 2375 #ifdef BDEBUG 2376 if (dst - conv_state->fstart < NBIDS) 2377 bids[dst - conv_state->fstart] = p->id + 1; 2378 #endif 2379 dst->code = (u_short)p->s.code; 2380 dst->k = p->s.k; 2381 if (JT(p)) { 2382 extrajmps = 0; 2383 off = JT(p)->offset - (p->offset + slen) - 1; 2384 if (off >= 256) { 2385 /* offset too large for branch, must add a jump */ 2386 if (p->longjt == 0) { 2387 /* mark this instruction and retry */ 2388 p->longjt++; 2389 return(0); 2390 } 2391 /* branch if T to following jump */ 2392 if (extrajmps >= 256) { 2393 conv_error(conv_state, "too many extra jumps"); 2394 /*NOTREACHED*/ 2395 } 2396 dst->jt = (u_char)extrajmps; 2397 extrajmps++; 2398 dst[extrajmps].code = BPF_JMP|BPF_JA; 2399 dst[extrajmps].k = off - extrajmps; 2400 } 2401 else 2402 dst->jt = (u_char)off; 2403 off = JF(p)->offset - (p->offset + slen) - 1; 2404 if (off >= 256) { 2405 /* offset too large for branch, must add a jump */ 2406 if (p->longjf == 0) { 2407 /* mark this instruction and retry */ 2408 p->longjf++; 2409 return(0); 2410 } 2411 /* branch if F to following jump */ 2412 /* if two jumps are inserted, F goes to second one */ 2413 if (extrajmps >= 256) { 2414 conv_error(conv_state, "too many extra jumps"); 2415 /*NOTREACHED*/ 2416 } 2417 dst->jf = (u_char)extrajmps; 2418 extrajmps++; 2419 dst[extrajmps].code = BPF_JMP|BPF_JA; 2420 dst[extrajmps].k = off - extrajmps; 2421 } 2422 else 2423 dst->jf = (u_char)off; 2424 } 2425 return (1); 2426 } 2427 2428 2429 /* 2430 * Convert flowgraph intermediate representation to the 2431 * BPF array representation. Set *lenp to the number of instructions. 2432 * 2433 * This routine does *NOT* leak the memory pointed to by fp. It *must 2434 * not* do free(fp) before returning fp; doing so would make no sense, 2435 * as the BPF array pointed to by the return value of icode_to_fcode() 2436 * must be valid - it's being returned for use in a bpf_program structure. 2437 * 2438 * If it appears that icode_to_fcode() is leaking, the problem is that 2439 * the program using pcap_compile() is failing to free the memory in 2440 * the BPF program when it's done - the leak is in the program, not in 2441 * the routine that happens to be allocating the memory. (By analogy, if 2442 * a program calls fopen() without ever calling fclose() on the FILE *, 2443 * it will leak the FILE structure; the leak is not in fopen(), it's in 2444 * the program.) Change the program to use pcap_freecode() when it's 2445 * done with the filter program. See the pcap man page. 2446 */ 2447 struct bpf_insn * 2448 icode_to_fcode(struct icode *ic, struct block *root, u_int *lenp, 2449 char *errbuf) 2450 { 2451 u_int n; 2452 struct bpf_insn *fp; 2453 conv_state_t conv_state; 2454 2455 conv_state.fstart = NULL; 2456 conv_state.errbuf = errbuf; 2457 if (setjmp(conv_state.top_ctx) != 0) { 2458 free(conv_state.fstart); 2459 return NULL; 2460 } 2461 2462 /* 2463 * Loop doing convert_code_r() until no branches remain 2464 * with too-large offsets. 2465 */ 2466 for (;;) { 2467 unMarkAll(ic); 2468 n = *lenp = count_stmts(ic, root); 2469 2470 fp = (struct bpf_insn *)malloc(sizeof(*fp) * n); 2471 if (fp == NULL) { 2472 (void)pcap_snprintf(errbuf, PCAP_ERRBUF_SIZE, 2473 "malloc"); 2474 free(fp); 2475 return NULL; 2476 } 2477 memset((char *)fp, 0, sizeof(*fp) * n); 2478 conv_state.fstart = fp; 2479 conv_state.ftail = fp + n; 2480 2481 unMarkAll(ic); 2482 if (convert_code_r(&conv_state, ic, root)) 2483 break; 2484 free(fp); 2485 } 2486 2487 return fp; 2488 } 2489 2490 /* 2491 * For iconv_to_fconv() errors. 2492 */ 2493 static void PCAP_NORETURN 2494 conv_error(conv_state_t *conv_state, const char *fmt, ...) 2495 { 2496 va_list ap; 2497 2498 va_start(ap, fmt); 2499 (void)pcap_vsnprintf(conv_state->errbuf, 2500 PCAP_ERRBUF_SIZE, fmt, ap); 2501 va_end(ap); 2502 longjmp(conv_state->top_ctx, 1); 2503 /* NOTREACHED */ 2504 } 2505 2506 /* 2507 * Make a copy of a BPF program and put it in the "fcode" member of 2508 * a "pcap_t". 2509 * 2510 * If we fail to allocate memory for the copy, fill in the "errbuf" 2511 * member of the "pcap_t" with an error message, and return -1; 2512 * otherwise, return 0. 2513 */ 2514 int 2515 install_bpf_program(pcap_t *p, struct bpf_program *fp) 2516 { 2517 size_t prog_size; 2518 2519 /* 2520 * Validate the program. 2521 */ 2522 if (!bpf_validate(fp->bf_insns, fp->bf_len)) { 2523 pcap_snprintf(p->errbuf, sizeof(p->errbuf), 2524 "BPF program is not valid"); 2525 return (-1); 2526 } 2527 2528 /* 2529 * Free up any already installed program. 2530 */ 2531 pcap_freecode(&p->fcode); 2532 2533 prog_size = sizeof(*fp->bf_insns) * fp->bf_len; 2534 p->fcode.bf_len = fp->bf_len; 2535 p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size); 2536 if (p->fcode.bf_insns == NULL) { 2537 pcap_fmt_errmsg_for_errno(p->errbuf, sizeof(p->errbuf), 2538 errno, "malloc"); 2539 return (-1); 2540 } 2541 memcpy(p->fcode.bf_insns, fp->bf_insns, prog_size); 2542 return (0); 2543 } 2544 2545 #ifdef BDEBUG 2546 static void 2547 dot_dump_node(struct icode *ic, struct block *block, struct bpf_program *prog, 2548 FILE *out) 2549 { 2550 int icount, noffset; 2551 int i; 2552 2553 if (block == NULL || isMarked(ic, block)) 2554 return; 2555 Mark(ic, block); 2556 2557 icount = slength(block->stmts) + 1 + block->longjt + block->longjf; 2558 noffset = min(block->offset + icount, (int)prog->bf_len); 2559 2560 fprintf(out, "\tblock%d [shape=ellipse, id=\"block-%d\" label=\"BLOCK%d\\n", block->id, block->id, block->id); 2561 for (i = block->offset; i < noffset; i++) { 2562 fprintf(out, "\\n%s", bpf_image(prog->bf_insns + i, i)); 2563 } 2564 fprintf(out, "\" tooltip=\""); 2565 for (i = 0; i < BPF_MEMWORDS; i++) 2566 if (block->val[i] != VAL_UNKNOWN) 2567 fprintf(out, "val[%d]=%d ", i, block->val[i]); 2568 fprintf(out, "val[A]=%d ", block->val[A_ATOM]); 2569 fprintf(out, "val[X]=%d", block->val[X_ATOM]); 2570 fprintf(out, "\""); 2571 if (JT(block) == NULL) 2572 fprintf(out, ", peripheries=2"); 2573 fprintf(out, "];\n"); 2574 2575 dot_dump_node(ic, JT(block), prog, out); 2576 dot_dump_node(ic, JF(block), prog, out); 2577 } 2578 2579 static void 2580 dot_dump_edge(struct icode *ic, struct block *block, FILE *out) 2581 { 2582 if (block == NULL || isMarked(ic, block)) 2583 return; 2584 Mark(ic, block); 2585 2586 if (JT(block)) { 2587 fprintf(out, "\t\"block%d\":se -> \"block%d\":n [label=\"T\"]; \n", 2588 block->id, JT(block)->id); 2589 fprintf(out, "\t\"block%d\":sw -> \"block%d\":n [label=\"F\"]; \n", 2590 block->id, JF(block)->id); 2591 } 2592 dot_dump_edge(ic, JT(block), out); 2593 dot_dump_edge(ic, JF(block), out); 2594 } 2595 2596 /* Output the block CFG using graphviz/DOT language 2597 * In the CFG, block's code, value index for each registers at EXIT, 2598 * and the jump relationship is show. 2599 * 2600 * example DOT for BPF `ip src host 1.1.1.1' is: 2601 digraph BPF { 2602 block0 [shape=ellipse, id="block-0" label="BLOCK0\n\n(000) ldh [12]\n(001) jeq #0x800 jt 2 jf 5" tooltip="val[A]=0 val[X]=0"]; 2603 block1 [shape=ellipse, id="block-1" label="BLOCK1\n\n(002) ld [26]\n(003) jeq #0x1010101 jt 4 jf 5" tooltip="val[A]=0 val[X]=0"]; 2604 block2 [shape=ellipse, id="block-2" label="BLOCK2\n\n(004) ret #68" tooltip="val[A]=0 val[X]=0", peripheries=2]; 2605 block3 [shape=ellipse, id="block-3" label="BLOCK3\n\n(005) ret #0" tooltip="val[A]=0 val[X]=0", peripheries=2]; 2606 "block0":se -> "block1":n [label="T"]; 2607 "block0":sw -> "block3":n [label="F"]; 2608 "block1":se -> "block2":n [label="T"]; 2609 "block1":sw -> "block3":n [label="F"]; 2610 } 2611 * 2612 * After install graphviz on http://www.graphviz.org/, save it as bpf.dot 2613 * and run `dot -Tpng -O bpf.dot' to draw the graph. 2614 */ 2615 static int 2616 dot_dump(struct icode *ic, char *errbuf) 2617 { 2618 struct bpf_program f; 2619 FILE *out = stdout; 2620 2621 memset(bids, 0, sizeof bids); 2622 f.bf_insns = icode_to_fcode(ic, ic->root, &f.bf_len, errbuf); 2623 if (f.bf_insns == NULL) 2624 return -1; 2625 2626 fprintf(out, "digraph BPF {\n"); 2627 unMarkAll(ic); 2628 dot_dump_node(ic, ic->root, &f, out); 2629 unMarkAll(ic); 2630 dot_dump_edge(ic, ic->root, out); 2631 fprintf(out, "}\n"); 2632 2633 free((char *)f.bf_insns); 2634 return 0; 2635 } 2636 2637 static int 2638 plain_dump(struct icode *ic, char *errbuf) 2639 { 2640 struct bpf_program f; 2641 2642 memset(bids, 0, sizeof bids); 2643 f.bf_insns = icode_to_fcode(ic, ic->root, &f.bf_len, errbuf); 2644 if (f.bf_insns == NULL) 2645 return -1; 2646 bpf_dump(&f, 1); 2647 putchar('\n'); 2648 free((char *)f.bf_insns); 2649 return 0; 2650 } 2651 2652 static void 2653 opt_dump(opt_state_t *opt_state, struct icode *ic) 2654 { 2655 int status; 2656 char errbuf[PCAP_ERRBUF_SIZE]; 2657 2658 /* 2659 * If the CFG, in DOT format, is requested, output it rather than 2660 * the code that would be generated from that graph. 2661 */ 2662 if (pcap_print_dot_graph) 2663 status = dot_dump(ic, errbuf); 2664 else 2665 status = plain_dump(ic, errbuf); 2666 if (status == -1) 2667 opt_error(opt_state, "opt_dump: icode_to_fcode failed: %s", errbuf); 2668 } 2669 #endif 2670