1 /* $NetBSD: optimize.c,v 1.10 2018/09/03 15:26:43 christos Exp $ */ 2 3 /* 4 * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that: (1) source code distributions 9 * retain the above copyright notice and this paragraph in its entirety, (2) 10 * distributions including binary code include the above copyright notice and 11 * this paragraph in its entirety in the documentation or other materials 12 * provided with the distribution, and (3) all advertising materials mentioning 13 * features or use of this software display the following acknowledgement: 14 * ``This product includes software developed by the University of California, 15 * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of 16 * the University nor the names of its contributors may be used to endorse 17 * or promote products derived from this software without specific prior 18 * written permission. 19 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED 20 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF 21 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 22 * 23 * Optimization module for BPF code intermediate representation. 24 */ 25 26 #include <sys/cdefs.h> 27 __RCSID("$NetBSD: optimize.c,v 1.10 2018/09/03 15:26:43 christos Exp $"); 28 29 #ifdef HAVE_CONFIG_H 30 #include <config.h> 31 #endif 32 33 #include <pcap-types.h> 34 35 #include <stdio.h> 36 #include <stdlib.h> 37 #include <memory.h> 38 #include <string.h> 39 40 #include <errno.h> 41 42 #include "pcap-int.h" 43 44 #include "gencode.h" 45 #include "optimize.h" 46 47 #ifdef HAVE_OS_PROTO_H 48 #include "os-proto.h" 49 #endif 50 51 #ifdef BDEBUG 52 /* 53 * The internal "debug printout" flag for the filter expression optimizer. 54 * The code to print that stuff is present only if BDEBUG is defined, so 55 * the flag, and the routine to set it, are defined only if BDEBUG is 56 * defined. 57 */ 58 static int pcap_optimizer_debug; 59 60 /* 61 * Routine to set that flag. 62 * 63 * This is intended for libpcap developers, not for general use. 64 * If you want to set these in a program, you'll have to declare this 65 * routine yourself, with the appropriate DLL import attribute on Windows; 66 * it's not declared in any header file, and won't be declared in any 67 * header file provided by libpcap. 68 */ 69 PCAP_API void pcap_set_optimizer_debug(int value); 70 71 PCAP_API_DEF void 72 pcap_set_optimizer_debug(int value) 73 { 74 pcap_optimizer_debug = value; 75 } 76 77 /* 78 * The internal "print dot graph" flag for the filter expression optimizer. 79 * The code to print that stuff is present only if BDEBUG is defined, so 80 * the flag, and the routine to set it, are defined only if BDEBUG is 81 * defined. 82 */ 83 static int pcap_print_dot_graph; 84 85 /* 86 * Routine to set that flag. 87 * 88 * This is intended for libpcap developers, not for general use. 89 * If you want to set these in a program, you'll have to declare this 90 * routine yourself, with the appropriate DLL import attribute on Windows; 91 * it's not declared in any header file, and won't be declared in any 92 * header file provided by libpcap. 93 */ 94 PCAP_API void pcap_set_print_dot_graph(int value); 95 96 PCAP_API_DEF void 97 pcap_set_print_dot_graph(int value) 98 { 99 pcap_print_dot_graph = value; 100 } 101 102 #endif 103 104 /* 105 * lowest_set_bit(). 106 * 107 * Takes a 32-bit integer as an argument. 108 * 109 * If handed a non-zero value, returns the index of the lowest set bit, 110 * counting upwards fro zero. 111 * 112 * If handed zero, the results are platform- and compiler-dependent. 113 * Keep it out of the light, don't give it any water, don't feed it 114 * after midnight, and don't pass zero to it. 115 * 116 * This is the same as the count of trailing zeroes in the word. 117 */ 118 #if PCAP_IS_AT_LEAST_GNUC_VERSION(3,4) 119 /* 120 * GCC 3.4 and later; we have __builtin_ctz(). 121 */ 122 #define lowest_set_bit(mask) __builtin_ctz(mask) 123 #elif defined(_MSC_VER) 124 /* 125 * Visual Studio; we support only 2005 and later, so use 126 * _BitScanForward(). 127 */ 128 #include <intrin.h> 129 130 #ifndef __clang__ 131 #pragma intrinsic(_BitScanForward) 132 #endif 133 134 static __forceinline int 135 lowest_set_bit(int mask) 136 { 137 unsigned long bit; 138 139 /* 140 * Don't sign-extend mask if long is longer than int. 141 * (It's currently not, in MSVC, even on 64-bit platforms, but....) 142 */ 143 if (_BitScanForward(&bit, (unsigned int)mask) == 0) 144 return -1; /* mask is zero */ 145 return (int)bit; 146 } 147 #elif defined(MSDOS) && defined(__DJGPP__) 148 /* 149 * MS-DOS with DJGPP, which declares ffs() in <string.h>, which 150 * we've already included. 151 */ 152 #define lowest_set_bit(mask) (ffs((mask)) - 1) 153 #elif (defined(MSDOS) && defined(__WATCOMC__)) || defined(STRINGS_H_DECLARES_FFS) 154 /* 155 * MS-DOS with Watcom C, which has <strings.h> and declares ffs() there, 156 * or some other platform (UN*X conforming to a sufficient recent version 157 * of the Single UNIX Specification). 158 */ 159 #include <strings.h> 160 #define lowest_set_bit(mask) (ffs((mask)) - 1) 161 #else 162 /* 163 * None of the above. 164 * Use a perfect-hash-function-based function. 165 */ 166 static int 167 lowest_set_bit(int mask) 168 { 169 unsigned int v = (unsigned int)mask; 170 171 static const int MultiplyDeBruijnBitPosition[32] = { 172 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, 173 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 174 }; 175 176 /* 177 * We strip off all but the lowermost set bit (v & ~v), 178 * and perform a minimal perfect hash on it to look up the 179 * number of low-order zero bits in a table. 180 * 181 * See: 182 * 183 * http://7ooo.mooo.com/text/ComputingTrailingZerosHOWTO.pdf 184 * 185 * http://supertech.csail.mit.edu/papers/debruijn.pdf 186 */ 187 return (MultiplyDeBruijnBitPosition[((v & -v) * 0x077CB531U) >> 27]); 188 } 189 #endif 190 191 /* 192 * Represents a deleted instruction. 193 */ 194 #define NOP -1 195 196 /* 197 * Register numbers for use-def values. 198 * 0 through BPF_MEMWORDS-1 represent the corresponding scratch memory 199 * location. A_ATOM is the accumulator and X_ATOM is the index 200 * register. 201 */ 202 #define A_ATOM BPF_MEMWORDS 203 #define X_ATOM (BPF_MEMWORDS+1) 204 205 /* 206 * This define is used to represent *both* the accumulator and 207 * x register in use-def computations. 208 * Currently, the use-def code assumes only one definition per instruction. 209 */ 210 #define AX_ATOM N_ATOMS 211 212 /* 213 * These data structures are used in a Cocke and Shwarz style 214 * value numbering scheme. Since the flowgraph is acyclic, 215 * exit values can be propagated from a node's predecessors 216 * provided it is uniquely defined. 217 */ 218 struct valnode { 219 int code; 220 int v0, v1; 221 int val; 222 struct valnode *next; 223 }; 224 225 /* Integer constants mapped with the load immediate opcode. */ 226 #define K(i) F(opt_state, BPF_LD|BPF_IMM|BPF_W, i, 0L) 227 228 struct vmapinfo { 229 int is_const; 230 bpf_int32 const_val; 231 }; 232 233 typedef struct { 234 /* 235 * A flag to indicate that further optimization is needed. 236 * Iterative passes are continued until a given pass yields no 237 * branch movement. 238 */ 239 int done; 240 241 int n_blocks; 242 struct block **blocks; 243 int n_edges; 244 struct edge **edges; 245 246 /* 247 * A bit vector set representation of the dominators. 248 * We round up the set size to the next power of two. 249 */ 250 int nodewords; 251 int edgewords; 252 struct block **levels; 253 bpf_u_int32 *space; 254 255 #define BITS_PER_WORD (8*sizeof(bpf_u_int32)) 256 /* 257 * True if a is in uset {p} 258 */ 259 #define SET_MEMBER(p, a) \ 260 ((p)[(unsigned)(a) / BITS_PER_WORD] & (1 << ((unsigned)(a) % BITS_PER_WORD))) 261 262 /* 263 * Add 'a' to uset p. 264 */ 265 #define SET_INSERT(p, a) \ 266 (p)[(unsigned)(a) / BITS_PER_WORD] |= (1 << ((unsigned)(a) % BITS_PER_WORD)) 267 268 /* 269 * Delete 'a' from uset p. 270 */ 271 #define SET_DELETE(p, a) \ 272 (p)[(unsigned)(a) / BITS_PER_WORD] &= ~(1 << ((unsigned)(a) % BITS_PER_WORD)) 273 274 /* 275 * a := a intersect b 276 */ 277 #define SET_INTERSECT(a, b, n)\ 278 {\ 279 register bpf_u_int32 *_x = a, *_y = b;\ 280 register int _n = n;\ 281 while (--_n >= 0) *_x++ &= *_y++;\ 282 } 283 284 /* 285 * a := a - b 286 */ 287 #define SET_SUBTRACT(a, b, n)\ 288 {\ 289 register bpf_u_int32 *_x = a, *_y = b;\ 290 register int _n = n;\ 291 while (--_n >= 0) *_x++ &=~ *_y++;\ 292 } 293 294 /* 295 * a := a union b 296 */ 297 #define SET_UNION(a, b, n)\ 298 {\ 299 register bpf_u_int32 *_x = a, *_y = b;\ 300 register int _n = n;\ 301 while (--_n >= 0) *_x++ |= *_y++;\ 302 } 303 304 uset all_dom_sets; 305 uset all_closure_sets; 306 uset all_edge_sets; 307 308 #define MODULUS 213 309 struct valnode *hashtbl[MODULUS]; 310 int curval; 311 int maxval; 312 313 struct vmapinfo *vmap; 314 struct valnode *vnode_base; 315 struct valnode *next_vnode; 316 } opt_state_t; 317 318 typedef struct { 319 /* 320 * Some pointers used to convert the basic block form of the code, 321 * into the array form that BPF requires. 'fstart' will point to 322 * the malloc'd array while 'ftail' is used during the recursive 323 * traversal. 324 */ 325 struct bpf_insn *fstart; 326 struct bpf_insn *ftail; 327 } conv_state_t; 328 329 static void opt_init(compiler_state_t *, opt_state_t *, struct icode *); 330 static void opt_cleanup(opt_state_t *); 331 332 static void intern_blocks(opt_state_t *, struct icode *); 333 334 static void find_inedges(opt_state_t *, struct block *); 335 #ifdef BDEBUG 336 static void opt_dump(compiler_state_t *, struct icode *); 337 #endif 338 339 #ifndef MAX 340 #define MAX(a,b) ((a)>(b)?(a):(b)) 341 #endif 342 343 static void 344 find_levels_r(opt_state_t *opt_state, struct icode *ic, struct block *b) 345 { 346 int level; 347 348 if (isMarked(ic, b)) 349 return; 350 351 Mark(ic, b); 352 b->link = 0; 353 354 if (JT(b)) { 355 find_levels_r(opt_state, ic, JT(b)); 356 find_levels_r(opt_state, ic, JF(b)); 357 level = MAX(JT(b)->level, JF(b)->level) + 1; 358 } else 359 level = 0; 360 b->level = level; 361 b->link = opt_state->levels[level]; 362 opt_state->levels[level] = b; 363 } 364 365 /* 366 * Level graph. The levels go from 0 at the leaves to 367 * N_LEVELS at the root. The opt_state->levels[] array points to the 368 * first node of the level list, whose elements are linked 369 * with the 'link' field of the struct block. 370 */ 371 static void 372 find_levels(opt_state_t *opt_state, struct icode *ic) 373 { 374 memset((char *)opt_state->levels, 0, opt_state->n_blocks * sizeof(*opt_state->levels)); 375 unMarkAll(ic); 376 find_levels_r(opt_state, ic, ic->root); 377 } 378 379 /* 380 * Find dominator relationships. 381 * Assumes graph has been leveled. 382 */ 383 static void 384 find_dom(opt_state_t *opt_state, struct block *root) 385 { 386 int i; 387 struct block *b; 388 bpf_u_int32 *x; 389 390 /* 391 * Initialize sets to contain all nodes. 392 */ 393 x = opt_state->all_dom_sets; 394 i = opt_state->n_blocks * opt_state->nodewords; 395 while (--i >= 0) 396 *x++ = 0xFFFFFFFFU; 397 /* Root starts off empty. */ 398 for (i = opt_state->nodewords; --i >= 0;) 399 root->dom[i] = 0; 400 401 /* root->level is the highest level no found. */ 402 for (i = root->level; i >= 0; --i) { 403 for (b = opt_state->levels[i]; b; b = b->link) { 404 SET_INSERT(b->dom, b->id); 405 if (JT(b) == 0) 406 continue; 407 SET_INTERSECT(JT(b)->dom, b->dom, opt_state->nodewords); 408 SET_INTERSECT(JF(b)->dom, b->dom, opt_state->nodewords); 409 } 410 } 411 } 412 413 static void 414 propedom(opt_state_t *opt_state, struct edge *ep) 415 { 416 SET_INSERT(ep->edom, ep->id); 417 if (ep->succ) { 418 SET_INTERSECT(ep->succ->et.edom, ep->edom, opt_state->edgewords); 419 SET_INTERSECT(ep->succ->ef.edom, ep->edom, opt_state->edgewords); 420 } 421 } 422 423 /* 424 * Compute edge dominators. 425 * Assumes graph has been leveled and predecessors established. 426 */ 427 static void 428 find_edom(opt_state_t *opt_state, struct block *root) 429 { 430 int i; 431 uset x; 432 struct block *b; 433 434 x = opt_state->all_edge_sets; 435 for (i = opt_state->n_edges * opt_state->edgewords; --i >= 0; ) 436 x[i] = 0xFFFFFFFFU; 437 438 /* root->level is the highest level no found. */ 439 memset(root->et.edom, 0, opt_state->edgewords * sizeof(*(uset)0)); 440 memset(root->ef.edom, 0, opt_state->edgewords * sizeof(*(uset)0)); 441 for (i = root->level; i >= 0; --i) { 442 for (b = opt_state->levels[i]; b != 0; b = b->link) { 443 propedom(opt_state, &b->et); 444 propedom(opt_state, &b->ef); 445 } 446 } 447 } 448 449 /* 450 * Find the backwards transitive closure of the flow graph. These sets 451 * are backwards in the sense that we find the set of nodes that reach 452 * a given node, not the set of nodes that can be reached by a node. 453 * 454 * Assumes graph has been leveled. 455 */ 456 static void 457 find_closure(opt_state_t *opt_state, struct block *root) 458 { 459 int i; 460 struct block *b; 461 462 /* 463 * Initialize sets to contain no nodes. 464 */ 465 memset((char *)opt_state->all_closure_sets, 0, 466 opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->all_closure_sets)); 467 468 /* root->level is the highest level no found. */ 469 for (i = root->level; i >= 0; --i) { 470 for (b = opt_state->levels[i]; b; b = b->link) { 471 SET_INSERT(b->closure, b->id); 472 if (JT(b) == 0) 473 continue; 474 SET_UNION(JT(b)->closure, b->closure, opt_state->nodewords); 475 SET_UNION(JF(b)->closure, b->closure, opt_state->nodewords); 476 } 477 } 478 } 479 480 /* 481 * Return the register number that is used by s. If A and X are both 482 * used, return AX_ATOM. If no register is used, return -1. 483 * 484 * The implementation should probably change to an array access. 485 */ 486 static int 487 atomuse(struct stmt *s) 488 { 489 register int c = s->code; 490 491 if (c == NOP) 492 return -1; 493 494 switch (BPF_CLASS(c)) { 495 496 case BPF_RET: 497 return (BPF_RVAL(c) == BPF_A) ? A_ATOM : 498 (BPF_RVAL(c) == BPF_X) ? X_ATOM : -1; 499 500 case BPF_LD: 501 case BPF_LDX: 502 return (BPF_MODE(c) == BPF_IND) ? X_ATOM : 503 (BPF_MODE(c) == BPF_MEM) ? s->k : -1; 504 505 case BPF_ST: 506 return A_ATOM; 507 508 case BPF_STX: 509 return X_ATOM; 510 511 case BPF_JMP: 512 case BPF_ALU: 513 if (BPF_SRC(c) == BPF_X) 514 return AX_ATOM; 515 return A_ATOM; 516 517 case BPF_MISC: 518 return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM; 519 } 520 abort(); 521 /* NOTREACHED */ 522 } 523 524 /* 525 * Return the register number that is defined by 's'. We assume that 526 * a single stmt cannot define more than one register. If no register 527 * is defined, return -1. 528 * 529 * The implementation should probably change to an array access. 530 */ 531 static int 532 atomdef(struct stmt *s) 533 { 534 if (s->code == NOP) 535 return -1; 536 537 switch (BPF_CLASS(s->code)) { 538 539 case BPF_LD: 540 case BPF_ALU: 541 return A_ATOM; 542 543 case BPF_LDX: 544 return X_ATOM; 545 546 case BPF_ST: 547 case BPF_STX: 548 return s->k; 549 550 case BPF_MISC: 551 return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM; 552 } 553 return -1; 554 } 555 556 /* 557 * Compute the sets of registers used, defined, and killed by 'b'. 558 * 559 * "Used" means that a statement in 'b' uses the register before any 560 * statement in 'b' defines it, i.e. it uses the value left in 561 * that register by a predecessor block of this block. 562 * "Defined" means that a statement in 'b' defines it. 563 * "Killed" means that a statement in 'b' defines it before any 564 * statement in 'b' uses it, i.e. it kills the value left in that 565 * register by a predecessor block of this block. 566 */ 567 static void 568 compute_local_ud(struct block *b) 569 { 570 struct slist *s; 571 atomset def = 0, use = 0, killed = 0; 572 int atom; 573 574 for (s = b->stmts; s; s = s->next) { 575 if (s->s.code == NOP) 576 continue; 577 atom = atomuse(&s->s); 578 if (atom >= 0) { 579 if (atom == AX_ATOM) { 580 if (!ATOMELEM(def, X_ATOM)) 581 use |= ATOMMASK(X_ATOM); 582 if (!ATOMELEM(def, A_ATOM)) 583 use |= ATOMMASK(A_ATOM); 584 } 585 else if (atom < N_ATOMS) { 586 if (!ATOMELEM(def, atom)) 587 use |= ATOMMASK(atom); 588 } 589 else 590 abort(); 591 } 592 atom = atomdef(&s->s); 593 if (atom >= 0) { 594 if (!ATOMELEM(use, atom)) 595 killed |= ATOMMASK(atom); 596 def |= ATOMMASK(atom); 597 } 598 } 599 if (BPF_CLASS(b->s.code) == BPF_JMP) { 600 /* 601 * XXX - what about RET? 602 */ 603 atom = atomuse(&b->s); 604 if (atom >= 0) { 605 if (atom == AX_ATOM) { 606 if (!ATOMELEM(def, X_ATOM)) 607 use |= ATOMMASK(X_ATOM); 608 if (!ATOMELEM(def, A_ATOM)) 609 use |= ATOMMASK(A_ATOM); 610 } 611 else if (atom < N_ATOMS) { 612 if (!ATOMELEM(def, atom)) 613 use |= ATOMMASK(atom); 614 } 615 else 616 abort(); 617 } 618 } 619 620 b->def = def; 621 b->kill = killed; 622 b->in_use = use; 623 } 624 625 /* 626 * Assume graph is already leveled. 627 */ 628 static void 629 find_ud(opt_state_t *opt_state, struct block *root) 630 { 631 int i, maxlevel; 632 struct block *p; 633 634 /* 635 * root->level is the highest level no found; 636 * count down from there. 637 */ 638 maxlevel = root->level; 639 for (i = maxlevel; i >= 0; --i) 640 for (p = opt_state->levels[i]; p; p = p->link) { 641 compute_local_ud(p); 642 p->out_use = 0; 643 } 644 645 for (i = 1; i <= maxlevel; ++i) { 646 for (p = opt_state->levels[i]; p; p = p->link) { 647 p->out_use |= JT(p)->in_use | JF(p)->in_use; 648 p->in_use |= p->out_use &~ p->kill; 649 } 650 } 651 } 652 static void 653 init_val(opt_state_t *opt_state) 654 { 655 opt_state->curval = 0; 656 opt_state->next_vnode = opt_state->vnode_base; 657 memset((char *)opt_state->vmap, 0, opt_state->maxval * sizeof(*opt_state->vmap)); 658 memset((char *)opt_state->hashtbl, 0, sizeof opt_state->hashtbl); 659 } 660 661 /* Because we really don't have an IR, this stuff is a little messy. */ 662 static int 663 F(opt_state_t *opt_state, int code, int v0, int v1) 664 { 665 u_int hash; 666 int val; 667 struct valnode *p; 668 669 hash = (u_int)code ^ ((u_int)v0 << 4) ^ ((u_int)v1 << 8); 670 hash %= MODULUS; 671 672 for (p = opt_state->hashtbl[hash]; p; p = p->next) 673 if (p->code == code && p->v0 == v0 && p->v1 == v1) 674 return p->val; 675 676 val = ++opt_state->curval; 677 if (BPF_MODE(code) == BPF_IMM && 678 (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) { 679 opt_state->vmap[val].const_val = v0; 680 opt_state->vmap[val].is_const = 1; 681 } 682 p = opt_state->next_vnode++; 683 p->val = val; 684 p->code = code; 685 p->v0 = v0; 686 p->v1 = v1; 687 p->next = opt_state->hashtbl[hash]; 688 opt_state->hashtbl[hash] = p; 689 690 return val; 691 } 692 693 static inline void 694 vstore(struct stmt *s, int *valp, int newval, int alter) 695 { 696 if (alter && newval != VAL_UNKNOWN && *valp == newval) 697 s->code = NOP; 698 else 699 *valp = newval; 700 } 701 702 /* 703 * Do constant-folding on binary operators. 704 * (Unary operators are handled elsewhere.) 705 */ 706 static void 707 fold_op(compiler_state_t *cstate, opt_state_t *opt_state, 708 struct stmt *s, int v0, int v1) 709 { 710 bpf_u_int32 a, b; 711 712 a = opt_state->vmap[v0].const_val; 713 b = opt_state->vmap[v1].const_val; 714 715 switch (BPF_OP(s->code)) { 716 case BPF_ADD: 717 a += b; 718 break; 719 720 case BPF_SUB: 721 a -= b; 722 break; 723 724 case BPF_MUL: 725 a *= b; 726 break; 727 728 case BPF_DIV: 729 if (b == 0) 730 bpf_error(cstate, "division by zero"); 731 a /= b; 732 break; 733 734 case BPF_MOD: 735 if (b == 0) 736 bpf_error(cstate, "modulus by zero"); 737 a %= b; 738 break; 739 740 case BPF_AND: 741 a &= b; 742 break; 743 744 case BPF_OR: 745 a |= b; 746 break; 747 748 case BPF_XOR: 749 a ^= b; 750 break; 751 752 case BPF_LSH: 753 a <<= b; 754 break; 755 756 case BPF_RSH: 757 a >>= b; 758 break; 759 760 default: 761 abort(); 762 } 763 s->k = a; 764 s->code = BPF_LD|BPF_IMM; 765 opt_state->done = 0; 766 } 767 768 static inline struct slist * 769 this_op(struct slist *s) 770 { 771 while (s != 0 && s->s.code == NOP) 772 s = s->next; 773 return s; 774 } 775 776 static void 777 opt_not(struct block *b) 778 { 779 struct block *tmp = JT(b); 780 781 JT(b) = JF(b); 782 JF(b) = tmp; 783 } 784 785 static void 786 opt_peep(opt_state_t *opt_state, struct block *b) 787 { 788 struct slist *s; 789 struct slist *next, *last; 790 int val; 791 792 s = b->stmts; 793 if (s == 0) 794 return; 795 796 last = s; 797 for (/*empty*/; /*empty*/; s = next) { 798 /* 799 * Skip over nops. 800 */ 801 s = this_op(s); 802 if (s == 0) 803 break; /* nothing left in the block */ 804 805 /* 806 * Find the next real instruction after that one 807 * (skipping nops). 808 */ 809 next = this_op(s->next); 810 if (next == 0) 811 break; /* no next instruction */ 812 last = next; 813 814 /* 815 * st M[k] --> st M[k] 816 * ldx M[k] tax 817 */ 818 if (s->s.code == BPF_ST && 819 next->s.code == (BPF_LDX|BPF_MEM) && 820 s->s.k == next->s.k) { 821 opt_state->done = 0; 822 next->s.code = BPF_MISC|BPF_TAX; 823 } 824 /* 825 * ld #k --> ldx #k 826 * tax txa 827 */ 828 if (s->s.code == (BPF_LD|BPF_IMM) && 829 next->s.code == (BPF_MISC|BPF_TAX)) { 830 s->s.code = BPF_LDX|BPF_IMM; 831 next->s.code = BPF_MISC|BPF_TXA; 832 opt_state->done = 0; 833 } 834 /* 835 * This is an ugly special case, but it happens 836 * when you say tcp[k] or udp[k] where k is a constant. 837 */ 838 if (s->s.code == (BPF_LD|BPF_IMM)) { 839 struct slist *add, *tax, *ild; 840 841 /* 842 * Check that X isn't used on exit from this 843 * block (which the optimizer might cause). 844 * We know the code generator won't generate 845 * any local dependencies. 846 */ 847 if (ATOMELEM(b->out_use, X_ATOM)) 848 continue; 849 850 /* 851 * Check that the instruction following the ldi 852 * is an addx, or it's an ldxms with an addx 853 * following it (with 0 or more nops between the 854 * ldxms and addx). 855 */ 856 if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B)) 857 add = next; 858 else 859 add = this_op(next->next); 860 if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X)) 861 continue; 862 863 /* 864 * Check that a tax follows that (with 0 or more 865 * nops between them). 866 */ 867 tax = this_op(add->next); 868 if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX)) 869 continue; 870 871 /* 872 * Check that an ild follows that (with 0 or more 873 * nops between them). 874 */ 875 ild = this_op(tax->next); 876 if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD || 877 BPF_MODE(ild->s.code) != BPF_IND) 878 continue; 879 /* 880 * We want to turn this sequence: 881 * 882 * (004) ldi #0x2 {s} 883 * (005) ldxms [14] {next} -- optional 884 * (006) addx {add} 885 * (007) tax {tax} 886 * (008) ild [x+0] {ild} 887 * 888 * into this sequence: 889 * 890 * (004) nop 891 * (005) ldxms [14] 892 * (006) nop 893 * (007) nop 894 * (008) ild [x+2] 895 * 896 * XXX We need to check that X is not 897 * subsequently used, because we want to change 898 * what'll be in it after this sequence. 899 * 900 * We know we can eliminate the accumulator 901 * modifications earlier in the sequence since 902 * it is defined by the last stmt of this sequence 903 * (i.e., the last statement of the sequence loads 904 * a value into the accumulator, so we can eliminate 905 * earlier operations on the accumulator). 906 */ 907 ild->s.k += s->s.k; 908 s->s.code = NOP; 909 add->s.code = NOP; 910 tax->s.code = NOP; 911 opt_state->done = 0; 912 } 913 } 914 /* 915 * If the comparison at the end of a block is an equality 916 * comparison against a constant, and nobody uses the value 917 * we leave in the A register at the end of a block, and 918 * the operation preceding the comparison is an arithmetic 919 * operation, we can sometime optimize it away. 920 */ 921 if (b->s.code == (BPF_JMP|BPF_JEQ|BPF_K) && 922 !ATOMELEM(b->out_use, A_ATOM)) { 923 /* 924 * We can optimize away certain subtractions of the 925 * X register. 926 */ 927 if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X)) { 928 val = b->val[X_ATOM]; 929 if (opt_state->vmap[val].is_const) { 930 /* 931 * If we have a subtract to do a comparison, 932 * and the X register is a known constant, 933 * we can merge this value into the 934 * comparison: 935 * 936 * sub x -> nop 937 * jeq #y jeq #(x+y) 938 */ 939 b->s.k += opt_state->vmap[val].const_val; 940 last->s.code = NOP; 941 opt_state->done = 0; 942 } else if (b->s.k == 0) { 943 /* 944 * If the X register isn't a constant, 945 * and the comparison in the test is 946 * against 0, we can compare with the 947 * X register, instead: 948 * 949 * sub x -> nop 950 * jeq #0 jeq x 951 */ 952 last->s.code = NOP; 953 b->s.code = BPF_JMP|BPF_JEQ|BPF_X; 954 opt_state->done = 0; 955 } 956 } 957 /* 958 * Likewise, a constant subtract can be simplified: 959 * 960 * sub #x -> nop 961 * jeq #y -> jeq #(x+y) 962 */ 963 else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K)) { 964 last->s.code = NOP; 965 b->s.k += last->s.k; 966 opt_state->done = 0; 967 } 968 /* 969 * And, similarly, a constant AND can be simplified 970 * if we're testing against 0, i.e.: 971 * 972 * and #k nop 973 * jeq #0 -> jset #k 974 */ 975 else if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) && 976 b->s.k == 0) { 977 b->s.k = last->s.k; 978 b->s.code = BPF_JMP|BPF_K|BPF_JSET; 979 last->s.code = NOP; 980 opt_state->done = 0; 981 opt_not(b); 982 } 983 } 984 /* 985 * jset #0 -> never 986 * jset #ffffffff -> always 987 */ 988 if (b->s.code == (BPF_JMP|BPF_K|BPF_JSET)) { 989 if (b->s.k == 0) 990 JT(b) = JF(b); 991 if ((u_int)b->s.k == 0xffffffffU) 992 JF(b) = JT(b); 993 } 994 /* 995 * If we're comparing against the index register, and the index 996 * register is a known constant, we can just compare against that 997 * constant. 998 */ 999 val = b->val[X_ATOM]; 1000 if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_X) { 1001 bpf_int32 v = opt_state->vmap[val].const_val; 1002 b->s.code &= ~BPF_X; 1003 b->s.k = v; 1004 } 1005 /* 1006 * If the accumulator is a known constant, we can compute the 1007 * comparison result. 1008 */ 1009 val = b->val[A_ATOM]; 1010 if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) { 1011 bpf_int32 v = opt_state->vmap[val].const_val; 1012 switch (BPF_OP(b->s.code)) { 1013 1014 case BPF_JEQ: 1015 v = v == b->s.k; 1016 break; 1017 1018 case BPF_JGT: 1019 v = (unsigned)v > (unsigned)b->s.k; 1020 break; 1021 1022 case BPF_JGE: 1023 v = (unsigned)v >= (unsigned)b->s.k; 1024 break; 1025 1026 case BPF_JSET: 1027 v &= b->s.k; 1028 break; 1029 1030 default: 1031 abort(); 1032 } 1033 if (JF(b) != JT(b)) 1034 opt_state->done = 0; 1035 if (v) 1036 JF(b) = JT(b); 1037 else 1038 JT(b) = JF(b); 1039 } 1040 } 1041 1042 /* 1043 * Compute the symbolic value of expression of 's', and update 1044 * anything it defines in the value table 'val'. If 'alter' is true, 1045 * do various optimizations. This code would be cleaner if symbolic 1046 * evaluation and code transformations weren't folded together. 1047 */ 1048 static void 1049 opt_stmt(compiler_state_t *cstate, opt_state_t *opt_state, 1050 struct stmt *s, int val[], int alter) 1051 { 1052 int op; 1053 int v; 1054 1055 switch (s->code) { 1056 1057 case BPF_LD|BPF_ABS|BPF_W: 1058 case BPF_LD|BPF_ABS|BPF_H: 1059 case BPF_LD|BPF_ABS|BPF_B: 1060 v = F(opt_state, s->code, s->k, 0L); 1061 vstore(s, &val[A_ATOM], v, alter); 1062 break; 1063 1064 case BPF_LD|BPF_IND|BPF_W: 1065 case BPF_LD|BPF_IND|BPF_H: 1066 case BPF_LD|BPF_IND|BPF_B: 1067 v = val[X_ATOM]; 1068 if (alter && opt_state->vmap[v].is_const) { 1069 s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code); 1070 s->k += opt_state->vmap[v].const_val; 1071 v = F(opt_state, s->code, s->k, 0L); 1072 opt_state->done = 0; 1073 } 1074 else 1075 v = F(opt_state, s->code, s->k, v); 1076 vstore(s, &val[A_ATOM], v, alter); 1077 break; 1078 1079 case BPF_LD|BPF_LEN: 1080 v = F(opt_state, s->code, 0L, 0L); 1081 vstore(s, &val[A_ATOM], v, alter); 1082 break; 1083 1084 case BPF_LD|BPF_IMM: 1085 v = K(s->k); 1086 vstore(s, &val[A_ATOM], v, alter); 1087 break; 1088 1089 case BPF_LDX|BPF_IMM: 1090 v = K(s->k); 1091 vstore(s, &val[X_ATOM], v, alter); 1092 break; 1093 1094 case BPF_LDX|BPF_MSH|BPF_B: 1095 v = F(opt_state, s->code, s->k, 0L); 1096 vstore(s, &val[X_ATOM], v, alter); 1097 break; 1098 1099 case BPF_ALU|BPF_NEG: 1100 if (alter && opt_state->vmap[val[A_ATOM]].is_const) { 1101 s->code = BPF_LD|BPF_IMM; 1102 s->k = -opt_state->vmap[val[A_ATOM]].const_val; 1103 val[A_ATOM] = K(s->k); 1104 } 1105 else 1106 val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], 0L); 1107 break; 1108 1109 case BPF_ALU|BPF_ADD|BPF_K: 1110 case BPF_ALU|BPF_SUB|BPF_K: 1111 case BPF_ALU|BPF_MUL|BPF_K: 1112 case BPF_ALU|BPF_DIV|BPF_K: 1113 case BPF_ALU|BPF_MOD|BPF_K: 1114 case BPF_ALU|BPF_AND|BPF_K: 1115 case BPF_ALU|BPF_OR|BPF_K: 1116 case BPF_ALU|BPF_XOR|BPF_K: 1117 case BPF_ALU|BPF_LSH|BPF_K: 1118 case BPF_ALU|BPF_RSH|BPF_K: 1119 op = BPF_OP(s->code); 1120 if (alter) { 1121 if (s->k == 0) { 1122 /* don't optimize away "sub #0" 1123 * as it may be needed later to 1124 * fixup the generated math code */ 1125 if (op == BPF_ADD || 1126 op == BPF_LSH || op == BPF_RSH || 1127 op == BPF_OR || op == BPF_XOR) { 1128 s->code = NOP; 1129 break; 1130 } 1131 if (op == BPF_MUL || op == BPF_AND) { 1132 s->code = BPF_LD|BPF_IMM; 1133 val[A_ATOM] = K(s->k); 1134 break; 1135 } 1136 } 1137 if (opt_state->vmap[val[A_ATOM]].is_const) { 1138 fold_op(cstate, opt_state, s, val[A_ATOM], K(s->k)); 1139 val[A_ATOM] = K(s->k); 1140 break; 1141 } 1142 } 1143 val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], K(s->k)); 1144 break; 1145 1146 case BPF_ALU|BPF_ADD|BPF_X: 1147 case BPF_ALU|BPF_SUB|BPF_X: 1148 case BPF_ALU|BPF_MUL|BPF_X: 1149 case BPF_ALU|BPF_DIV|BPF_X: 1150 case BPF_ALU|BPF_MOD|BPF_X: 1151 case BPF_ALU|BPF_AND|BPF_X: 1152 case BPF_ALU|BPF_OR|BPF_X: 1153 case BPF_ALU|BPF_XOR|BPF_X: 1154 case BPF_ALU|BPF_LSH|BPF_X: 1155 case BPF_ALU|BPF_RSH|BPF_X: 1156 op = BPF_OP(s->code); 1157 if (alter && opt_state->vmap[val[X_ATOM]].is_const) { 1158 if (opt_state->vmap[val[A_ATOM]].is_const) { 1159 fold_op(cstate, opt_state, s, val[A_ATOM], val[X_ATOM]); 1160 val[A_ATOM] = K(s->k); 1161 } 1162 else { 1163 s->code = BPF_ALU|BPF_K|op; 1164 s->k = opt_state->vmap[val[X_ATOM]].const_val; 1165 opt_state->done = 0; 1166 val[A_ATOM] = 1167 F(opt_state, s->code, val[A_ATOM], K(s->k)); 1168 } 1169 break; 1170 } 1171 /* 1172 * Check if we're doing something to an accumulator 1173 * that is 0, and simplify. This may not seem like 1174 * much of a simplification but it could open up further 1175 * optimizations. 1176 * XXX We could also check for mul by 1, etc. 1177 */ 1178 if (alter && opt_state->vmap[val[A_ATOM]].is_const 1179 && opt_state->vmap[val[A_ATOM]].const_val == 0) { 1180 if (op == BPF_ADD || op == BPF_OR || op == BPF_XOR) { 1181 s->code = BPF_MISC|BPF_TXA; 1182 vstore(s, &val[A_ATOM], val[X_ATOM], alter); 1183 break; 1184 } 1185 else if (op == BPF_MUL || op == BPF_DIV || op == BPF_MOD || 1186 op == BPF_AND || op == BPF_LSH || op == BPF_RSH) { 1187 s->code = BPF_LD|BPF_IMM; 1188 s->k = 0; 1189 vstore(s, &val[A_ATOM], K(s->k), alter); 1190 break; 1191 } 1192 else if (op == BPF_NEG) { 1193 s->code = NOP; 1194 break; 1195 } 1196 } 1197 val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], val[X_ATOM]); 1198 break; 1199 1200 case BPF_MISC|BPF_TXA: 1201 vstore(s, &val[A_ATOM], val[X_ATOM], alter); 1202 break; 1203 1204 case BPF_LD|BPF_MEM: 1205 v = val[s->k]; 1206 if (alter && opt_state->vmap[v].is_const) { 1207 s->code = BPF_LD|BPF_IMM; 1208 s->k = opt_state->vmap[v].const_val; 1209 opt_state->done = 0; 1210 } 1211 vstore(s, &val[A_ATOM], v, alter); 1212 break; 1213 1214 case BPF_MISC|BPF_TAX: 1215 vstore(s, &val[X_ATOM], val[A_ATOM], alter); 1216 break; 1217 1218 case BPF_LDX|BPF_MEM: 1219 v = val[s->k]; 1220 if (alter && opt_state->vmap[v].is_const) { 1221 s->code = BPF_LDX|BPF_IMM; 1222 s->k = opt_state->vmap[v].const_val; 1223 opt_state->done = 0; 1224 } 1225 vstore(s, &val[X_ATOM], v, alter); 1226 break; 1227 1228 case BPF_ST: 1229 vstore(s, &val[s->k], val[A_ATOM], alter); 1230 break; 1231 1232 case BPF_STX: 1233 vstore(s, &val[s->k], val[X_ATOM], alter); 1234 break; 1235 } 1236 } 1237 1238 static void 1239 deadstmt(opt_state_t *opt_state, register struct stmt *s, register struct stmt *last[]) 1240 { 1241 register int atom; 1242 1243 atom = atomuse(s); 1244 if (atom >= 0) { 1245 if (atom == AX_ATOM) { 1246 last[X_ATOM] = 0; 1247 last[A_ATOM] = 0; 1248 } 1249 else 1250 last[atom] = 0; 1251 } 1252 atom = atomdef(s); 1253 if (atom >= 0) { 1254 if (last[atom]) { 1255 opt_state->done = 0; 1256 last[atom]->code = NOP; 1257 } 1258 last[atom] = s; 1259 } 1260 } 1261 1262 static void 1263 opt_deadstores(opt_state_t *opt_state, register struct block *b) 1264 { 1265 register struct slist *s; 1266 register int atom; 1267 struct stmt *last[N_ATOMS]; 1268 1269 memset((char *)last, 0, sizeof last); 1270 1271 for (s = b->stmts; s != 0; s = s->next) 1272 deadstmt(opt_state, &s->s, last); 1273 deadstmt(opt_state, &b->s, last); 1274 1275 for (atom = 0; atom < N_ATOMS; ++atom) 1276 if (last[atom] && !ATOMELEM(b->out_use, atom)) { 1277 last[atom]->code = NOP; 1278 opt_state->done = 0; 1279 } 1280 } 1281 1282 static void 1283 opt_blk(compiler_state_t *cstate, opt_state_t *opt_state, 1284 struct block *b, int do_stmts) 1285 { 1286 struct slist *s; 1287 struct edge *p; 1288 int i; 1289 bpf_int32 aval, xval; 1290 1291 #if 0 1292 for (s = b->stmts; s && s->next; s = s->next) 1293 if (BPF_CLASS(s->s.code) == BPF_JMP) { 1294 do_stmts = 0; 1295 break; 1296 } 1297 #endif 1298 1299 /* 1300 * Initialize the atom values. 1301 */ 1302 p = b->in_edges; 1303 if (p == 0) { 1304 /* 1305 * We have no predecessors, so everything is undefined 1306 * upon entry to this block. 1307 */ 1308 memset((char *)b->val, 0, sizeof(b->val)); 1309 } else { 1310 /* 1311 * Inherit values from our predecessors. 1312 * 1313 * First, get the values from the predecessor along the 1314 * first edge leading to this node. 1315 */ 1316 memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val)); 1317 /* 1318 * Now look at all the other nodes leading to this node. 1319 * If, for the predecessor along that edge, a register 1320 * has a different value from the one we have (i.e., 1321 * control paths are merging, and the merging paths 1322 * assign different values to that register), give the 1323 * register the undefined value of 0. 1324 */ 1325 while ((p = p->next) != NULL) { 1326 for (i = 0; i < N_ATOMS; ++i) 1327 if (b->val[i] != p->pred->val[i]) 1328 b->val[i] = 0; 1329 } 1330 } 1331 aval = b->val[A_ATOM]; 1332 xval = b->val[X_ATOM]; 1333 for (s = b->stmts; s; s = s->next) 1334 opt_stmt(cstate, opt_state, &s->s, b->val, do_stmts); 1335 1336 /* 1337 * This is a special case: if we don't use anything from this 1338 * block, and we load the accumulator or index register with a 1339 * value that is already there, or if this block is a return, 1340 * eliminate all the statements. 1341 * 1342 * XXX - what if it does a store? 1343 * 1344 * XXX - why does it matter whether we use anything from this 1345 * block? If the accumulator or index register doesn't change 1346 * its value, isn't that OK even if we use that value? 1347 * 1348 * XXX - if we load the accumulator with a different value, 1349 * and the block ends with a conditional branch, we obviously 1350 * can't eliminate it, as the branch depends on that value. 1351 * For the index register, the conditional branch only depends 1352 * on the index register value if the test is against the index 1353 * register value rather than a constant; if nothing uses the 1354 * value we put into the index register, and we're not testing 1355 * against the index register's value, and there aren't any 1356 * other problems that would keep us from eliminating this 1357 * block, can we eliminate it? 1358 */ 1359 if (do_stmts && 1360 ((b->out_use == 0 && 1361 aval != VAL_UNKNOWN && b->val[A_ATOM] == aval && 1362 xval != VAL_UNKNOWN && b->val[X_ATOM] == xval) || 1363 BPF_CLASS(b->s.code) == BPF_RET)) { 1364 if (b->stmts != 0) { 1365 b->stmts = 0; 1366 opt_state->done = 0; 1367 } 1368 } else { 1369 opt_peep(opt_state, b); 1370 opt_deadstores(opt_state, b); 1371 } 1372 /* 1373 * Set up values for branch optimizer. 1374 */ 1375 if (BPF_SRC(b->s.code) == BPF_K) 1376 b->oval = K(b->s.k); 1377 else 1378 b->oval = b->val[X_ATOM]; 1379 b->et.code = b->s.code; 1380 b->ef.code = -b->s.code; 1381 } 1382 1383 /* 1384 * Return true if any register that is used on exit from 'succ', has 1385 * an exit value that is different from the corresponding exit value 1386 * from 'b'. 1387 */ 1388 static int 1389 use_conflict(struct block *b, struct block *succ) 1390 { 1391 int atom; 1392 atomset use = succ->out_use; 1393 1394 if (use == 0) 1395 return 0; 1396 1397 for (atom = 0; atom < N_ATOMS; ++atom) 1398 if (ATOMELEM(use, atom)) 1399 if (b->val[atom] != succ->val[atom]) 1400 return 1; 1401 return 0; 1402 } 1403 1404 static struct block * 1405 fold_edge(struct block *child, struct edge *ep) 1406 { 1407 int sense; 1408 int aval0, aval1, oval0, oval1; 1409 int code = ep->code; 1410 1411 if (code < 0) { 1412 code = -code; 1413 sense = 0; 1414 } else 1415 sense = 1; 1416 1417 if (child->s.code != code) 1418 return 0; 1419 1420 aval0 = child->val[A_ATOM]; 1421 oval0 = child->oval; 1422 aval1 = ep->pred->val[A_ATOM]; 1423 oval1 = ep->pred->oval; 1424 1425 if (aval0 != aval1) 1426 return 0; 1427 1428 if (oval0 == oval1) 1429 /* 1430 * The operands of the branch instructions are 1431 * identical, so the result is true if a true 1432 * branch was taken to get here, otherwise false. 1433 */ 1434 return sense ? JT(child) : JF(child); 1435 1436 if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K)) 1437 /* 1438 * At this point, we only know the comparison if we 1439 * came down the true branch, and it was an equality 1440 * comparison with a constant. 1441 * 1442 * I.e., if we came down the true branch, and the branch 1443 * was an equality comparison with a constant, we know the 1444 * accumulator contains that constant. If we came down 1445 * the false branch, or the comparison wasn't with a 1446 * constant, we don't know what was in the accumulator. 1447 * 1448 * We rely on the fact that distinct constants have distinct 1449 * value numbers. 1450 */ 1451 return JF(child); 1452 1453 return 0; 1454 } 1455 1456 static void 1457 opt_j(opt_state_t *opt_state, struct edge *ep) 1458 { 1459 register int i, k; 1460 register struct block *target; 1461 1462 if (JT(ep->succ) == 0) 1463 return; 1464 1465 if (JT(ep->succ) == JF(ep->succ)) { 1466 /* 1467 * Common branch targets can be eliminated, provided 1468 * there is no data dependency. 1469 */ 1470 if (!use_conflict(ep->pred, ep->succ->et.succ)) { 1471 opt_state->done = 0; 1472 ep->succ = JT(ep->succ); 1473 } 1474 } 1475 /* 1476 * For each edge dominator that matches the successor of this 1477 * edge, promote the edge successor to the its grandchild. 1478 * 1479 * XXX We violate the set abstraction here in favor a reasonably 1480 * efficient loop. 1481 */ 1482 top: 1483 for (i = 0; i < opt_state->edgewords; ++i) { 1484 register bpf_u_int32 x = ep->edom[i]; 1485 1486 while (x != 0) { 1487 k = lowest_set_bit(x); 1488 x &=~ (1 << k); 1489 k += i * BITS_PER_WORD; 1490 1491 target = fold_edge(ep->succ, opt_state->edges[k]); 1492 /* 1493 * Check that there is no data dependency between 1494 * nodes that will be violated if we move the edge. 1495 */ 1496 if (target != 0 && !use_conflict(ep->pred, target)) { 1497 opt_state->done = 0; 1498 ep->succ = target; 1499 if (JT(target) != 0) 1500 /* 1501 * Start over unless we hit a leaf. 1502 */ 1503 goto top; 1504 return; 1505 } 1506 } 1507 } 1508 } 1509 1510 1511 static void 1512 or_pullup(opt_state_t *opt_state, struct block *b) 1513 { 1514 int val, at_top; 1515 struct block *pull; 1516 struct block **diffp, **samep; 1517 struct edge *ep; 1518 1519 ep = b->in_edges; 1520 if (ep == 0) 1521 return; 1522 1523 /* 1524 * Make sure each predecessor loads the same value. 1525 * XXX why? 1526 */ 1527 val = ep->pred->val[A_ATOM]; 1528 for (ep = ep->next; ep != 0; ep = ep->next) 1529 if (val != ep->pred->val[A_ATOM]) 1530 return; 1531 1532 if (JT(b->in_edges->pred) == b) 1533 diffp = &JT(b->in_edges->pred); 1534 else 1535 diffp = &JF(b->in_edges->pred); 1536 1537 at_top = 1; 1538 for (;;) { 1539 if (*diffp == 0) 1540 return; 1541 1542 if (JT(*diffp) != JT(b)) 1543 return; 1544 1545 if (!SET_MEMBER((*diffp)->dom, b->id)) 1546 return; 1547 1548 if ((*diffp)->val[A_ATOM] != val) 1549 break; 1550 1551 diffp = &JF(*diffp); 1552 at_top = 0; 1553 } 1554 samep = &JF(*diffp); 1555 for (;;) { 1556 if (*samep == 0) 1557 return; 1558 1559 if (JT(*samep) != JT(b)) 1560 return; 1561 1562 if (!SET_MEMBER((*samep)->dom, b->id)) 1563 return; 1564 1565 if ((*samep)->val[A_ATOM] == val) 1566 break; 1567 1568 /* XXX Need to check that there are no data dependencies 1569 between dp0 and dp1. Currently, the code generator 1570 will not produce such dependencies. */ 1571 samep = &JF(*samep); 1572 } 1573 #ifdef notdef 1574 /* XXX This doesn't cover everything. */ 1575 for (i = 0; i < N_ATOMS; ++i) 1576 if ((*samep)->val[i] != pred->val[i]) 1577 return; 1578 #endif 1579 /* Pull up the node. */ 1580 pull = *samep; 1581 *samep = JF(pull); 1582 JF(pull) = *diffp; 1583 1584 /* 1585 * At the top of the chain, each predecessor needs to point at the 1586 * pulled up node. Inside the chain, there is only one predecessor 1587 * to worry about. 1588 */ 1589 if (at_top) { 1590 for (ep = b->in_edges; ep != 0; ep = ep->next) { 1591 if (JT(ep->pred) == b) 1592 JT(ep->pred) = pull; 1593 else 1594 JF(ep->pred) = pull; 1595 } 1596 } 1597 else 1598 *diffp = pull; 1599 1600 opt_state->done = 0; 1601 } 1602 1603 static void 1604 and_pullup(opt_state_t *opt_state, struct block *b) 1605 { 1606 int val, at_top; 1607 struct block *pull; 1608 struct block **diffp, **samep; 1609 struct edge *ep; 1610 1611 ep = b->in_edges; 1612 if (ep == 0) 1613 return; 1614 1615 /* 1616 * Make sure each predecessor loads the same value. 1617 */ 1618 val = ep->pred->val[A_ATOM]; 1619 for (ep = ep->next; ep != 0; ep = ep->next) 1620 if (val != ep->pred->val[A_ATOM]) 1621 return; 1622 1623 if (JT(b->in_edges->pred) == b) 1624 diffp = &JT(b->in_edges->pred); 1625 else 1626 diffp = &JF(b->in_edges->pred); 1627 1628 at_top = 1; 1629 for (;;) { 1630 if (*diffp == 0) 1631 return; 1632 1633 if (JF(*diffp) != JF(b)) 1634 return; 1635 1636 if (!SET_MEMBER((*diffp)->dom, b->id)) 1637 return; 1638 1639 if ((*diffp)->val[A_ATOM] != val) 1640 break; 1641 1642 diffp = &JT(*diffp); 1643 at_top = 0; 1644 } 1645 samep = &JT(*diffp); 1646 for (;;) { 1647 if (*samep == 0) 1648 return; 1649 1650 if (JF(*samep) != JF(b)) 1651 return; 1652 1653 if (!SET_MEMBER((*samep)->dom, b->id)) 1654 return; 1655 1656 if ((*samep)->val[A_ATOM] == val) 1657 break; 1658 1659 /* XXX Need to check that there are no data dependencies 1660 between diffp and samep. Currently, the code generator 1661 will not produce such dependencies. */ 1662 samep = &JT(*samep); 1663 } 1664 #ifdef notdef 1665 /* XXX This doesn't cover everything. */ 1666 for (i = 0; i < N_ATOMS; ++i) 1667 if ((*samep)->val[i] != pred->val[i]) 1668 return; 1669 #endif 1670 /* Pull up the node. */ 1671 pull = *samep; 1672 *samep = JT(pull); 1673 JT(pull) = *diffp; 1674 1675 /* 1676 * At the top of the chain, each predecessor needs to point at the 1677 * pulled up node. Inside the chain, there is only one predecessor 1678 * to worry about. 1679 */ 1680 if (at_top) { 1681 for (ep = b->in_edges; ep != 0; ep = ep->next) { 1682 if (JT(ep->pred) == b) 1683 JT(ep->pred) = pull; 1684 else 1685 JF(ep->pred) = pull; 1686 } 1687 } 1688 else 1689 *diffp = pull; 1690 1691 opt_state->done = 0; 1692 } 1693 1694 static void 1695 opt_blks(compiler_state_t *cstate, opt_state_t *opt_state, struct icode *ic, 1696 int do_stmts) 1697 { 1698 int i, maxlevel; 1699 struct block *p; 1700 1701 init_val(opt_state); 1702 maxlevel = ic->root->level; 1703 1704 find_inedges(opt_state, ic->root); 1705 for (i = maxlevel; i >= 0; --i) 1706 for (p = opt_state->levels[i]; p; p = p->link) 1707 opt_blk(cstate, opt_state, p, do_stmts); 1708 1709 if (do_stmts) 1710 /* 1711 * No point trying to move branches; it can't possibly 1712 * make a difference at this point. 1713 */ 1714 return; 1715 1716 for (i = 1; i <= maxlevel; ++i) { 1717 for (p = opt_state->levels[i]; p; p = p->link) { 1718 opt_j(opt_state, &p->et); 1719 opt_j(opt_state, &p->ef); 1720 } 1721 } 1722 1723 find_inedges(opt_state, ic->root); 1724 for (i = 1; i <= maxlevel; ++i) { 1725 for (p = opt_state->levels[i]; p; p = p->link) { 1726 or_pullup(opt_state, p); 1727 and_pullup(opt_state, p); 1728 } 1729 } 1730 } 1731 1732 static inline void 1733 link_inedge(struct edge *parent, struct block *child) 1734 { 1735 parent->next = child->in_edges; 1736 child->in_edges = parent; 1737 } 1738 1739 static void 1740 find_inedges(opt_state_t *opt_state, struct block *root) 1741 { 1742 int i; 1743 struct block *b; 1744 1745 for (i = 0; i < opt_state->n_blocks; ++i) 1746 opt_state->blocks[i]->in_edges = 0; 1747 1748 /* 1749 * Traverse the graph, adding each edge to the predecessor 1750 * list of its successors. Skip the leaves (i.e. level 0). 1751 */ 1752 for (i = root->level; i > 0; --i) { 1753 for (b = opt_state->levels[i]; b != 0; b = b->link) { 1754 link_inedge(&b->et, JT(b)); 1755 link_inedge(&b->ef, JF(b)); 1756 } 1757 } 1758 } 1759 1760 static void 1761 opt_root(struct block **b) 1762 { 1763 struct slist *tmp, *s; 1764 1765 s = (*b)->stmts; 1766 (*b)->stmts = 0; 1767 while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b)) 1768 *b = JT(*b); 1769 1770 tmp = (*b)->stmts; 1771 if (tmp != 0) 1772 sappend(s, tmp); 1773 (*b)->stmts = s; 1774 1775 /* 1776 * If the root node is a return, then there is no 1777 * point executing any statements (since the bpf machine 1778 * has no side effects). 1779 */ 1780 if (BPF_CLASS((*b)->s.code) == BPF_RET) 1781 (*b)->stmts = 0; 1782 } 1783 1784 static void 1785 opt_loop(compiler_state_t *cstate, opt_state_t *opt_state, struct icode *ic, 1786 int do_stmts) 1787 { 1788 1789 #ifdef BDEBUG 1790 if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) { 1791 printf("opt_loop(root, %d) begin\n", do_stmts); 1792 opt_dump(cstate, ic); 1793 } 1794 #endif 1795 do { 1796 opt_state->done = 1; 1797 find_levels(opt_state, ic); 1798 find_dom(opt_state, ic->root); 1799 find_closure(opt_state, ic->root); 1800 find_ud(opt_state, ic->root); 1801 find_edom(opt_state, ic->root); 1802 opt_blks(cstate, opt_state, ic, do_stmts); 1803 #ifdef BDEBUG 1804 if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) { 1805 printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, opt_state->done); 1806 opt_dump(cstate, ic); 1807 } 1808 #endif 1809 } while (!opt_state->done); 1810 } 1811 1812 /* 1813 * Optimize the filter code in its dag representation. 1814 */ 1815 void 1816 bpf_optimize(compiler_state_t *cstate, struct icode *ic) 1817 { 1818 opt_state_t opt_state; 1819 1820 opt_init(cstate, &opt_state, ic); 1821 opt_loop(cstate, &opt_state, ic, 0); 1822 opt_loop(cstate, &opt_state, ic, 1); 1823 intern_blocks(&opt_state, ic); 1824 #ifdef BDEBUG 1825 if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) { 1826 printf("after intern_blocks()\n"); 1827 opt_dump(cstate, ic); 1828 } 1829 #endif 1830 opt_root(&ic->root); 1831 #ifdef BDEBUG 1832 if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) { 1833 printf("after opt_root()\n"); 1834 opt_dump(cstate, ic); 1835 } 1836 #endif 1837 opt_cleanup(&opt_state); 1838 } 1839 1840 static void 1841 make_marks(struct icode *ic, struct block *p) 1842 { 1843 if (!isMarked(ic, p)) { 1844 Mark(ic, p); 1845 if (BPF_CLASS(p->s.code) != BPF_RET) { 1846 make_marks(ic, JT(p)); 1847 make_marks(ic, JF(p)); 1848 } 1849 } 1850 } 1851 1852 /* 1853 * Mark code array such that isMarked(ic->cur_mark, i) is true 1854 * only for nodes that are alive. 1855 */ 1856 static void 1857 mark_code(struct icode *ic) 1858 { 1859 ic->cur_mark += 1; 1860 make_marks(ic, ic->root); 1861 } 1862 1863 /* 1864 * True iff the two stmt lists load the same value from the packet into 1865 * the accumulator. 1866 */ 1867 static int 1868 eq_slist(struct slist *x, struct slist *y) 1869 { 1870 for (;;) { 1871 while (x && x->s.code == NOP) 1872 x = x->next; 1873 while (y && y->s.code == NOP) 1874 y = y->next; 1875 if (x == 0) 1876 return y == 0; 1877 if (y == 0) 1878 return x == 0; 1879 if (x->s.code != y->s.code || x->s.k != y->s.k) 1880 return 0; 1881 x = x->next; 1882 y = y->next; 1883 } 1884 } 1885 1886 static inline int 1887 eq_blk(struct block *b0, struct block *b1) 1888 { 1889 if (b0->s.code == b1->s.code && 1890 b0->s.k == b1->s.k && 1891 b0->et.succ == b1->et.succ && 1892 b0->ef.succ == b1->ef.succ) 1893 return eq_slist(b0->stmts, b1->stmts); 1894 return 0; 1895 } 1896 1897 static void 1898 intern_blocks(opt_state_t *opt_state, struct icode *ic) 1899 { 1900 struct block *p; 1901 int i, j; 1902 int done1; /* don't shadow global */ 1903 top: 1904 done1 = 1; 1905 for (i = 0; i < opt_state->n_blocks; ++i) 1906 opt_state->blocks[i]->link = 0; 1907 1908 mark_code(ic); 1909 1910 for (i = opt_state->n_blocks - 1; --i >= 0; ) { 1911 if (!isMarked(ic, opt_state->blocks[i])) 1912 continue; 1913 for (j = i + 1; j < opt_state->n_blocks; ++j) { 1914 if (!isMarked(ic, opt_state->blocks[j])) 1915 continue; 1916 if (eq_blk(opt_state->blocks[i], opt_state->blocks[j])) { 1917 opt_state->blocks[i]->link = opt_state->blocks[j]->link ? 1918 opt_state->blocks[j]->link : opt_state->blocks[j]; 1919 break; 1920 } 1921 } 1922 } 1923 for (i = 0; i < opt_state->n_blocks; ++i) { 1924 p = opt_state->blocks[i]; 1925 if (JT(p) == 0) 1926 continue; 1927 if (JT(p)->link) { 1928 done1 = 0; 1929 JT(p) = JT(p)->link; 1930 } 1931 if (JF(p)->link) { 1932 done1 = 0; 1933 JF(p) = JF(p)->link; 1934 } 1935 } 1936 if (!done1) 1937 goto top; 1938 } 1939 1940 static void 1941 opt_cleanup(opt_state_t *opt_state) 1942 { 1943 free((void *)opt_state->vnode_base); 1944 free((void *)opt_state->vmap); 1945 free((void *)opt_state->edges); 1946 free((void *)opt_state->space); 1947 free((void *)opt_state->levels); 1948 free((void *)opt_state->blocks); 1949 } 1950 1951 /* 1952 * Return the number of stmts in 's'. 1953 */ 1954 static u_int 1955 slength(struct slist *s) 1956 { 1957 u_int n = 0; 1958 1959 for (; s; s = s->next) 1960 if (s->s.code != NOP) 1961 ++n; 1962 return n; 1963 } 1964 1965 /* 1966 * Return the number of nodes reachable by 'p'. 1967 * All nodes should be initially unmarked. 1968 */ 1969 static int 1970 count_blocks(struct icode *ic, struct block *p) 1971 { 1972 if (p == 0 || isMarked(ic, p)) 1973 return 0; 1974 Mark(ic, p); 1975 return count_blocks(ic, JT(p)) + count_blocks(ic, JF(p)) + 1; 1976 } 1977 1978 /* 1979 * Do a depth first search on the flow graph, numbering the 1980 * the basic blocks, and entering them into the 'blocks' array.` 1981 */ 1982 static void 1983 number_blks_r(opt_state_t *opt_state, struct icode *ic, struct block *p) 1984 { 1985 int n; 1986 1987 if (p == 0 || isMarked(ic, p)) 1988 return; 1989 1990 Mark(ic, p); 1991 n = opt_state->n_blocks++; 1992 p->id = n; 1993 opt_state->blocks[n] = p; 1994 1995 number_blks_r(opt_state, ic, JT(p)); 1996 number_blks_r(opt_state, ic, JF(p)); 1997 } 1998 1999 /* 2000 * Return the number of stmts in the flowgraph reachable by 'p'. 2001 * The nodes should be unmarked before calling. 2002 * 2003 * Note that "stmts" means "instructions", and that this includes 2004 * 2005 * side-effect statements in 'p' (slength(p->stmts)); 2006 * 2007 * statements in the true branch from 'p' (count_stmts(JT(p))); 2008 * 2009 * statements in the false branch from 'p' (count_stmts(JF(p))); 2010 * 2011 * the conditional jump itself (1); 2012 * 2013 * an extra long jump if the true branch requires it (p->longjt); 2014 * 2015 * an extra long jump if the false branch requires it (p->longjf). 2016 */ 2017 static u_int 2018 count_stmts(struct icode *ic, struct block *p) 2019 { 2020 u_int n; 2021 2022 if (p == 0 || isMarked(ic, p)) 2023 return 0; 2024 Mark(ic, p); 2025 n = count_stmts(ic, JT(p)) + count_stmts(ic, JF(p)); 2026 return slength(p->stmts) + n + 1 + p->longjt + p->longjf; 2027 } 2028 2029 /* 2030 * Allocate memory. All allocation is done before optimization 2031 * is begun. A linear bound on the size of all data structures is computed 2032 * from the total number of blocks and/or statements. 2033 */ 2034 static void 2035 opt_init(compiler_state_t *cstate, opt_state_t *opt_state, struct icode *ic) 2036 { 2037 bpf_u_int32 *p; 2038 int i, n, max_stmts; 2039 2040 /* 2041 * First, count the blocks, so we can malloc an array to map 2042 * block number to block. Then, put the blocks into the array. 2043 */ 2044 unMarkAll(ic); 2045 n = count_blocks(ic, ic->root); 2046 opt_state->blocks = (struct block **)calloc(n, sizeof(*opt_state->blocks)); 2047 if (opt_state->blocks == NULL) 2048 bpf_error(cstate, "malloc"); 2049 unMarkAll(ic); 2050 opt_state->n_blocks = 0; 2051 number_blks_r(opt_state, ic, ic->root); 2052 2053 opt_state->n_edges = 2 * opt_state->n_blocks; 2054 opt_state->edges = (struct edge **)calloc(opt_state->n_edges, sizeof(*opt_state->edges)); 2055 if (opt_state->edges == NULL) 2056 bpf_error(cstate, "malloc"); 2057 2058 /* 2059 * The number of levels is bounded by the number of nodes. 2060 */ 2061 opt_state->levels = (struct block **)calloc(opt_state->n_blocks, sizeof(*opt_state->levels)); 2062 if (opt_state->levels == NULL) 2063 bpf_error(cstate, "malloc"); 2064 2065 opt_state->edgewords = opt_state->n_edges / (8 * sizeof(bpf_u_int32)) + 1; 2066 opt_state->nodewords = opt_state->n_blocks / (8 * sizeof(bpf_u_int32)) + 1; 2067 2068 /* XXX */ 2069 opt_state->space = (bpf_u_int32 *)malloc(2 * opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->space) 2070 + opt_state->n_edges * opt_state->edgewords * sizeof(*opt_state->space)); 2071 if (opt_state->space == NULL) 2072 bpf_error(cstate, "malloc"); 2073 p = opt_state->space; 2074 opt_state->all_dom_sets = p; 2075 for (i = 0; i < n; ++i) { 2076 opt_state->blocks[i]->dom = p; 2077 p += opt_state->nodewords; 2078 } 2079 opt_state->all_closure_sets = p; 2080 for (i = 0; i < n; ++i) { 2081 opt_state->blocks[i]->closure = p; 2082 p += opt_state->nodewords; 2083 } 2084 opt_state->all_edge_sets = p; 2085 for (i = 0; i < n; ++i) { 2086 register struct block *b = opt_state->blocks[i]; 2087 2088 b->et.edom = p; 2089 p += opt_state->edgewords; 2090 b->ef.edom = p; 2091 p += opt_state->edgewords; 2092 b->et.id = i; 2093 opt_state->edges[i] = &b->et; 2094 b->ef.id = opt_state->n_blocks + i; 2095 opt_state->edges[opt_state->n_blocks + i] = &b->ef; 2096 b->et.pred = b; 2097 b->ef.pred = b; 2098 } 2099 max_stmts = 0; 2100 for (i = 0; i < n; ++i) 2101 max_stmts += slength(opt_state->blocks[i]->stmts) + 1; 2102 /* 2103 * We allocate at most 3 value numbers per statement, 2104 * so this is an upper bound on the number of valnodes 2105 * we'll need. 2106 */ 2107 opt_state->maxval = 3 * max_stmts; 2108 opt_state->vmap = (struct vmapinfo *)calloc(opt_state->maxval, sizeof(*opt_state->vmap)); 2109 opt_state->vnode_base = (struct valnode *)calloc(opt_state->maxval, sizeof(*opt_state->vnode_base)); 2110 if (opt_state->vmap == NULL || opt_state->vnode_base == NULL) 2111 bpf_error(cstate, "malloc"); 2112 } 2113 2114 /* 2115 * This is only used when supporting optimizer debugging. It is 2116 * global state, so do *not* do more than one compile in parallel 2117 * and expect it to provide meaningful information. 2118 */ 2119 #ifdef BDEBUG 2120 int bids[NBIDS]; 2121 #endif 2122 2123 /* 2124 * Returns true if successful. Returns false if a branch has 2125 * an offset that is too large. If so, we have marked that 2126 * branch so that on a subsequent iteration, it will be treated 2127 * properly. 2128 */ 2129 static int 2130 convert_code_r(compiler_state_t *cstate, conv_state_t *conv_state, 2131 struct icode *ic, struct block *p) 2132 { 2133 struct bpf_insn *dst; 2134 struct slist *src; 2135 u_int slen; 2136 u_int off; 2137 u_int extrajmps; /* number of extra jumps inserted */ 2138 struct slist **offset = NULL; 2139 2140 if (p == 0 || isMarked(ic, p)) 2141 return (1); 2142 Mark(ic, p); 2143 2144 if (convert_code_r(cstate, conv_state, ic, JF(p)) == 0) 2145 return (0); 2146 if (convert_code_r(cstate, conv_state, ic, JT(p)) == 0) 2147 return (0); 2148 2149 slen = slength(p->stmts); 2150 dst = conv_state->ftail -= (slen + 1 + p->longjt + p->longjf); 2151 /* inflate length by any extra jumps */ 2152 2153 p->offset = (int)(dst - conv_state->fstart); 2154 2155 /* generate offset[] for convenience */ 2156 if (slen) { 2157 offset = (struct slist **)calloc(slen, sizeof(struct slist *)); 2158 if (!offset) { 2159 bpf_error(cstate, "not enough core"); 2160 /*NOTREACHED*/ 2161 } 2162 } 2163 src = p->stmts; 2164 for (off = 0; off < slen && src; off++) { 2165 #if 0 2166 printf("off=%d src=%x\n", off, src); 2167 #endif 2168 offset[off] = src; 2169 src = src->next; 2170 } 2171 2172 off = 0; 2173 for (src = p->stmts; src; src = src->next) { 2174 if (src->s.code == NOP) 2175 continue; 2176 dst->code = (u_short)src->s.code; 2177 dst->k = src->s.k; 2178 2179 /* fill block-local relative jump */ 2180 if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) { 2181 #if 0 2182 if (src->s.jt || src->s.jf) { 2183 bpf_error(cstate, "illegal jmp destination"); 2184 /*NOTREACHED*/ 2185 } 2186 #endif 2187 goto filled; 2188 } 2189 if (off == slen - 2) /*???*/ 2190 goto filled; 2191 2192 { 2193 u_int i; 2194 int jt, jf; 2195 const char ljerr[] = "%s for block-local relative jump: off=%d"; 2196 2197 #if 0 2198 printf("code=%x off=%d %x %x\n", src->s.code, 2199 off, src->s.jt, src->s.jf); 2200 #endif 2201 2202 if (!src->s.jt || !src->s.jf) { 2203 bpf_error(cstate, ljerr, "no jmp destination", off); 2204 /*NOTREACHED*/ 2205 } 2206 2207 jt = jf = 0; 2208 for (i = 0; i < slen; i++) { 2209 if (offset[i] == src->s.jt) { 2210 if (jt) { 2211 bpf_error(cstate, ljerr, "multiple matches", off); 2212 /*NOTREACHED*/ 2213 } 2214 2215 if (i - off - 1 >= 256) { 2216 bpf_error(cstate, ljerr, "out-of-range jump", off); 2217 /*NOTREACHED*/ 2218 } 2219 dst->jt = (u_char)(i - off - 1); 2220 jt++; 2221 } 2222 if (offset[i] == src->s.jf) { 2223 if (jf) { 2224 bpf_error(cstate, ljerr, "multiple matches", off); 2225 /*NOTREACHED*/ 2226 } 2227 if (i - off - 1 >= 256) { 2228 bpf_error(cstate, ljerr, "out-of-range jump", off); 2229 /*NOTREACHED*/ 2230 } 2231 dst->jf = (u_char)(i - off - 1); 2232 jf++; 2233 } 2234 } 2235 if (!jt || !jf) { 2236 bpf_error(cstate, ljerr, "no destination found", off); 2237 /*NOTREACHED*/ 2238 } 2239 } 2240 filled: 2241 ++dst; 2242 ++off; 2243 } 2244 if (offset) 2245 free(offset); 2246 2247 #ifdef BDEBUG 2248 if (dst - conv_state->fstart < NBIDS) 2249 bids[dst - conv_state->fstart] = p->id + 1; 2250 #endif 2251 dst->code = (u_short)p->s.code; 2252 dst->k = p->s.k; 2253 if (JT(p)) { 2254 extrajmps = 0; 2255 off = JT(p)->offset - (p->offset + slen) - 1; 2256 if (off >= 256) { 2257 /* offset too large for branch, must add a jump */ 2258 if (p->longjt == 0) { 2259 /* mark this instruction and retry */ 2260 p->longjt++; 2261 return(0); 2262 } 2263 /* branch if T to following jump */ 2264 if (extrajmps >= 256) { 2265 bpf_error(cstate, "too many extra jumps"); 2266 /*NOTREACHED*/ 2267 } 2268 dst->jt = (u_char)extrajmps; 2269 extrajmps++; 2270 dst[extrajmps].code = BPF_JMP|BPF_JA; 2271 dst[extrajmps].k = off - extrajmps; 2272 } 2273 else 2274 dst->jt = (u_char)off; 2275 off = JF(p)->offset - (p->offset + slen) - 1; 2276 if (off >= 256) { 2277 /* offset too large for branch, must add a jump */ 2278 if (p->longjf == 0) { 2279 /* mark this instruction and retry */ 2280 p->longjf++; 2281 return(0); 2282 } 2283 /* branch if F to following jump */ 2284 /* if two jumps are inserted, F goes to second one */ 2285 if (extrajmps >= 256) { 2286 bpf_error(cstate, "too many extra jumps"); 2287 /*NOTREACHED*/ 2288 } 2289 dst->jf = (u_char)extrajmps; 2290 extrajmps++; 2291 dst[extrajmps].code = BPF_JMP|BPF_JA; 2292 dst[extrajmps].k = off - extrajmps; 2293 } 2294 else 2295 dst->jf = (u_char)off; 2296 } 2297 return (1); 2298 } 2299 2300 2301 /* 2302 * Convert flowgraph intermediate representation to the 2303 * BPF array representation. Set *lenp to the number of instructions. 2304 * 2305 * This routine does *NOT* leak the memory pointed to by fp. It *must 2306 * not* do free(fp) before returning fp; doing so would make no sense, 2307 * as the BPF array pointed to by the return value of icode_to_fcode() 2308 * must be valid - it's being returned for use in a bpf_program structure. 2309 * 2310 * If it appears that icode_to_fcode() is leaking, the problem is that 2311 * the program using pcap_compile() is failing to free the memory in 2312 * the BPF program when it's done - the leak is in the program, not in 2313 * the routine that happens to be allocating the memory. (By analogy, if 2314 * a program calls fopen() without ever calling fclose() on the FILE *, 2315 * it will leak the FILE structure; the leak is not in fopen(), it's in 2316 * the program.) Change the program to use pcap_freecode() when it's 2317 * done with the filter program. See the pcap man page. 2318 */ 2319 struct bpf_insn * 2320 icode_to_fcode(compiler_state_t *cstate, struct icode *ic, 2321 struct block *root, u_int *lenp) 2322 { 2323 u_int n; 2324 struct bpf_insn *fp; 2325 conv_state_t conv_state; 2326 2327 /* 2328 * Loop doing convert_code_r() until no branches remain 2329 * with too-large offsets. 2330 */ 2331 for (;;) { 2332 unMarkAll(ic); 2333 n = *lenp = count_stmts(ic, root); 2334 2335 fp = (struct bpf_insn *)malloc(sizeof(*fp) * n); 2336 if (fp == NULL) 2337 bpf_error(cstate, "malloc"); 2338 memset((char *)fp, 0, sizeof(*fp) * n); 2339 conv_state.fstart = fp; 2340 conv_state.ftail = fp + n; 2341 2342 unMarkAll(ic); 2343 if (convert_code_r(cstate, &conv_state, ic, root)) 2344 break; 2345 free(fp); 2346 } 2347 2348 return fp; 2349 } 2350 2351 /* 2352 * Make a copy of a BPF program and put it in the "fcode" member of 2353 * a "pcap_t". 2354 * 2355 * If we fail to allocate memory for the copy, fill in the "errbuf" 2356 * member of the "pcap_t" with an error message, and return -1; 2357 * otherwise, return 0. 2358 */ 2359 int 2360 install_bpf_program(pcap_t *p, struct bpf_program *fp) 2361 { 2362 size_t prog_size; 2363 2364 /* 2365 * Validate the program. 2366 */ 2367 if (!bpf_validate(fp->bf_insns, fp->bf_len)) { 2368 pcap_snprintf(p->errbuf, sizeof(p->errbuf), 2369 "BPF program is not valid"); 2370 return (-1); 2371 } 2372 2373 /* 2374 * Free up any already installed program. 2375 */ 2376 pcap_freecode(&p->fcode); 2377 2378 prog_size = sizeof(*fp->bf_insns) * fp->bf_len; 2379 p->fcode.bf_len = fp->bf_len; 2380 p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size); 2381 if (p->fcode.bf_insns == NULL) { 2382 pcap_fmt_errmsg_for_errno(p->errbuf, sizeof(p->errbuf), 2383 errno, "malloc"); 2384 return (-1); 2385 } 2386 memcpy(p->fcode.bf_insns, fp->bf_insns, prog_size); 2387 return (0); 2388 } 2389 2390 #ifdef BDEBUG 2391 static void 2392 dot_dump_node(struct icode *ic, struct block *block, struct bpf_program *prog, 2393 FILE *out) 2394 { 2395 int icount, noffset; 2396 int i; 2397 2398 if (block == NULL || isMarked(ic, block)) 2399 return; 2400 Mark(ic, block); 2401 2402 icount = slength(block->stmts) + 1 + block->longjt + block->longjf; 2403 noffset = min(block->offset + icount, (int)prog->bf_len); 2404 2405 fprintf(out, "\tblock%d [shape=ellipse, id=\"block-%d\" label=\"BLOCK%d\\n", block->id, block->id, block->id); 2406 for (i = block->offset; i < noffset; i++) { 2407 fprintf(out, "\\n%s", bpf_image(prog->bf_insns + i, i)); 2408 } 2409 fprintf(out, "\" tooltip=\""); 2410 for (i = 0; i < BPF_MEMWORDS; i++) 2411 if (block->val[i] != VAL_UNKNOWN) 2412 fprintf(out, "val[%d]=%d ", i, block->val[i]); 2413 fprintf(out, "val[A]=%d ", block->val[A_ATOM]); 2414 fprintf(out, "val[X]=%d", block->val[X_ATOM]); 2415 fprintf(out, "\""); 2416 if (JT(block) == NULL) 2417 fprintf(out, ", peripheries=2"); 2418 fprintf(out, "];\n"); 2419 2420 dot_dump_node(ic, JT(block), prog, out); 2421 dot_dump_node(ic, JF(block), prog, out); 2422 } 2423 2424 static void 2425 dot_dump_edge(struct icode *ic, struct block *block, FILE *out) 2426 { 2427 if (block == NULL || isMarked(ic, block)) 2428 return; 2429 Mark(ic, block); 2430 2431 if (JT(block)) { 2432 fprintf(out, "\t\"block%d\":se -> \"block%d\":n [label=\"T\"]; \n", 2433 block->id, JT(block)->id); 2434 fprintf(out, "\t\"block%d\":sw -> \"block%d\":n [label=\"F\"]; \n", 2435 block->id, JF(block)->id); 2436 } 2437 dot_dump_edge(ic, JT(block), out); 2438 dot_dump_edge(ic, JF(block), out); 2439 } 2440 2441 /* Output the block CFG using graphviz/DOT language 2442 * In the CFG, block's code, value index for each registers at EXIT, 2443 * and the jump relationship is show. 2444 * 2445 * example DOT for BPF `ip src host 1.1.1.1' is: 2446 digraph BPF { 2447 block0 [shape=ellipse, id="block-0" label="BLOCK0\n\n(000) ldh [12]\n(001) jeq #0x800 jt 2 jf 5" tooltip="val[A]=0 val[X]=0"]; 2448 block1 [shape=ellipse, id="block-1" label="BLOCK1\n\n(002) ld [26]\n(003) jeq #0x1010101 jt 4 jf 5" tooltip="val[A]=0 val[X]=0"]; 2449 block2 [shape=ellipse, id="block-2" label="BLOCK2\n\n(004) ret #68" tooltip="val[A]=0 val[X]=0", peripheries=2]; 2450 block3 [shape=ellipse, id="block-3" label="BLOCK3\n\n(005) ret #0" tooltip="val[A]=0 val[X]=0", peripheries=2]; 2451 "block0":se -> "block1":n [label="T"]; 2452 "block0":sw -> "block3":n [label="F"]; 2453 "block1":se -> "block2":n [label="T"]; 2454 "block1":sw -> "block3":n [label="F"]; 2455 } 2456 * 2457 * After install graphviz on http://www.graphviz.org/, save it as bpf.dot 2458 * and run `dot -Tpng -O bpf.dot' to draw the graph. 2459 */ 2460 static void 2461 dot_dump(compiler_state_t *cstate, struct icode *ic) 2462 { 2463 struct bpf_program f; 2464 FILE *out = stdout; 2465 2466 memset(bids, 0, sizeof bids); 2467 f.bf_insns = icode_to_fcode(cstate, ic, ic->root, &f.bf_len); 2468 2469 fprintf(out, "digraph BPF {\n"); 2470 unMarkAll(ic); 2471 dot_dump_node(ic, ic->root, &f, out); 2472 unMarkAll(ic); 2473 dot_dump_edge(ic, ic->root, out); 2474 fprintf(out, "}\n"); 2475 2476 free((char *)f.bf_insns); 2477 } 2478 2479 static void 2480 plain_dump(compiler_state_t *cstate, struct icode *ic) 2481 { 2482 struct bpf_program f; 2483 2484 memset(bids, 0, sizeof bids); 2485 f.bf_insns = icode_to_fcode(cstate, ic, ic->root, &f.bf_len); 2486 bpf_dump(&f, 1); 2487 putchar('\n'); 2488 free((char *)f.bf_insns); 2489 } 2490 2491 static void 2492 opt_dump(compiler_state_t *cstate, struct icode *ic) 2493 { 2494 /* 2495 * If the CFG, in DOT format, is requested, output it rather than 2496 * the code that would be generated from that graph. 2497 */ 2498 if (pcap_print_dot_graph) 2499 dot_dump(cstate, ic); 2500 else 2501 plain_dump(cstate, ic); 2502 } 2503 #endif 2504