1 /* $NetBSD: optimize.c,v 1.5 2013/04/06 17:29:53 christos Exp $ */ 2 3 /* 4 * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that: (1) source code distributions 9 * retain the above copyright notice and this paragraph in its entirety, (2) 10 * distributions including binary code include the above copyright notice and 11 * this paragraph in its entirety in the documentation or other materials 12 * provided with the distribution, and (3) all advertising materials mentioning 13 * features or use of this software display the following acknowledgement: 14 * ``This product includes software developed by the University of California, 15 * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of 16 * the University nor the names of its contributors may be used to endorse 17 * or promote products derived from this software without specific prior 18 * written permission. 19 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED 20 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF 21 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 22 * 23 * Optimization module for tcpdump intermediate representation. 24 */ 25 #ifndef lint 26 static const char rcsid[] _U_ = 27 "@(#) Header: /tcpdump/master/libpcap/optimize.c,v 1.91 2008-01-02 04:16:46 guy Exp (LBL)"; 28 #endif 29 30 #ifdef HAVE_CONFIG_H 31 #include "config.h" 32 #endif 33 34 #ifdef WIN32 35 #include <pcap-stdinc.h> 36 #else /* WIN32 */ 37 #if HAVE_INTTYPES_H 38 #include <inttypes.h> 39 #elif HAVE_STDINT_H 40 #include <stdint.h> 41 #endif 42 #ifdef HAVE_SYS_BITYPES_H 43 #include <sys/bitypes.h> 44 #endif 45 #include <sys/types.h> 46 #endif /* WIN32 */ 47 48 #include <stdio.h> 49 #include <stdlib.h> 50 #include <memory.h> 51 #include <string.h> 52 53 #include <errno.h> 54 55 #include "pcap-int.h" 56 57 #include "gencode.h" 58 59 #ifdef HAVE_OS_PROTO_H 60 #include "os-proto.h" 61 #endif 62 63 #ifdef BDEBUG 64 extern int dflag; 65 #endif 66 67 #if defined(MSDOS) && !defined(__DJGPP__) 68 extern int _w32_ffs (int mask); 69 #define ffs _w32_ffs 70 #endif 71 72 #if defined(WIN32) && defined (_MSC_VER) 73 int ffs(int mask); 74 #endif 75 76 /* 77 * Represents a deleted instruction. 78 */ 79 #define NOP -1 80 81 /* 82 * Register numbers for use-def values. 83 * 0 through BPF_MEMWORDS-1 represent the corresponding scratch memory 84 * location. A_ATOM is the accumulator and X_ATOM is the index 85 * register. 86 */ 87 #define A_ATOM BPF_MEMWORDS 88 #define X_ATOM (BPF_MEMWORDS+1) 89 90 /* 91 * This define is used to represent *both* the accumulator and 92 * x register in use-def computations. 93 * Currently, the use-def code assumes only one definition per instruction. 94 */ 95 #define AX_ATOM N_ATOMS 96 97 /* 98 * A flag to indicate that further optimization is needed. 99 * Iterative passes are continued until a given pass yields no 100 * branch movement. 101 */ 102 static int done; 103 104 /* 105 * A block is marked if only if its mark equals the current mark. 106 * Rather than traverse the code array, marking each item, 'cur_mark' is 107 * incremented. This automatically makes each element unmarked. 108 */ 109 static int cur_mark; 110 #define isMarked(p) ((p)->mark == cur_mark) 111 #define unMarkAll() cur_mark += 1 112 #define Mark(p) ((p)->mark = cur_mark) 113 114 static void opt_init(struct block *); 115 static void opt_cleanup(void); 116 117 static void make_marks(struct block *); 118 static void mark_code(struct block *); 119 120 static void intern_blocks(struct block *); 121 122 static int eq_slist(struct slist *, struct slist *); 123 124 static void find_levels_r(struct block *); 125 126 static void find_levels(struct block *); 127 static void find_dom(struct block *); 128 static void propedom(struct edge *); 129 static void find_edom(struct block *); 130 static void find_closure(struct block *); 131 static int atomuse(struct stmt *); 132 static int atomdef(struct stmt *); 133 static void compute_local_ud(struct block *); 134 static void find_ud(struct block *); 135 static void init_val(void); 136 static int F(int, int, int); 137 static inline void vstore(struct stmt *, int *, int, int); 138 static void opt_blk(struct block *, int); 139 static int use_conflict(struct block *, struct block *); 140 static void opt_j(struct edge *); 141 static void or_pullup(struct block *); 142 static void and_pullup(struct block *); 143 static void opt_blks(struct block *, int); 144 static inline void link_inedge(struct edge *, struct block *); 145 static void find_inedges(struct block *); 146 static void opt_root(struct block **); 147 static void opt_loop(struct block *, int); 148 static void fold_op(struct stmt *, int, int); 149 static inline struct slist *this_op(struct slist *); 150 static void opt_not(struct block *); 151 static void opt_peep(struct block *); 152 static void opt_stmt(struct stmt *, int[], int); 153 static void deadstmt(struct stmt *, struct stmt *[]); 154 static void opt_deadstores(struct block *); 155 static struct block *fold_edge(struct block *, struct edge *); 156 static inline int eq_blk(struct block *, struct block *); 157 static u_int slength(struct slist *); 158 static int count_blocks(struct block *); 159 static void number_blks_r(struct block *); 160 static u_int count_stmts(struct block *); 161 static int convert_code_r(struct block *); 162 #ifdef BDEBUG 163 static void opt_dump(struct block *); 164 #endif 165 166 static int n_blocks; 167 struct block **blocks; 168 static int n_edges; 169 struct edge **edges; 170 171 /* 172 * A bit vector set representation of the dominators. 173 * We round up the set size to the next power of two. 174 */ 175 static int nodewords; 176 static int edgewords; 177 struct block **levels; 178 bpf_u_int32 *space; 179 #define BITS_PER_WORD (8*sizeof(bpf_u_int32)) 180 /* 181 * True if a is in uset {p} 182 */ 183 #define SET_MEMBER(p, a) \ 184 ((p)[(unsigned)(a) / BITS_PER_WORD] & (1 << ((unsigned)(a) % BITS_PER_WORD))) 185 186 /* 187 * Add 'a' to uset p. 188 */ 189 #define SET_INSERT(p, a) \ 190 (p)[(unsigned)(a) / BITS_PER_WORD] |= (1 << ((unsigned)(a) % BITS_PER_WORD)) 191 192 /* 193 * Delete 'a' from uset p. 194 */ 195 #define SET_DELETE(p, a) \ 196 (p)[(unsigned)(a) / BITS_PER_WORD] &= ~(1 << ((unsigned)(a) % BITS_PER_WORD)) 197 198 /* 199 * a := a intersect b 200 */ 201 #define SET_INTERSECT(a, b, n)\ 202 {\ 203 register bpf_u_int32 *_x = a, *_y = b;\ 204 register int _n = n;\ 205 while (--_n >= 0) *_x++ &= *_y++;\ 206 } 207 208 /* 209 * a := a - b 210 */ 211 #define SET_SUBTRACT(a, b, n)\ 212 {\ 213 register bpf_u_int32 *_x = a, *_y = b;\ 214 register int _n = n;\ 215 while (--_n >= 0) *_x++ &=~ *_y++;\ 216 } 217 218 /* 219 * a := a union b 220 */ 221 #define SET_UNION(a, b, n)\ 222 {\ 223 register bpf_u_int32 *_x = a, *_y = b;\ 224 register int _n = n;\ 225 while (--_n >= 0) *_x++ |= *_y++;\ 226 } 227 228 static uset all_dom_sets; 229 static uset all_closure_sets; 230 static uset all_edge_sets; 231 232 #ifndef MAX 233 #define MAX(a,b) ((a)>(b)?(a):(b)) 234 #endif 235 236 static void 237 find_levels_r(b) 238 struct block *b; 239 { 240 int level; 241 242 if (isMarked(b)) 243 return; 244 245 Mark(b); 246 b->link = 0; 247 248 if (JT(b)) { 249 find_levels_r(JT(b)); 250 find_levels_r(JF(b)); 251 level = MAX(JT(b)->level, JF(b)->level) + 1; 252 } else 253 level = 0; 254 b->level = level; 255 b->link = levels[level]; 256 levels[level] = b; 257 } 258 259 /* 260 * Level graph. The levels go from 0 at the leaves to 261 * N_LEVELS at the root. The levels[] array points to the 262 * first node of the level list, whose elements are linked 263 * with the 'link' field of the struct block. 264 */ 265 static void 266 find_levels(root) 267 struct block *root; 268 { 269 memset((char *)levels, 0, n_blocks * sizeof(*levels)); 270 unMarkAll(); 271 find_levels_r(root); 272 } 273 274 /* 275 * Find dominator relationships. 276 * Assumes graph has been leveled. 277 */ 278 static void 279 find_dom(root) 280 struct block *root; 281 { 282 int i; 283 struct block *b; 284 bpf_u_int32 *x; 285 286 /* 287 * Initialize sets to contain all nodes. 288 */ 289 x = all_dom_sets; 290 i = n_blocks * nodewords; 291 while (--i >= 0) 292 *x++ = ~0; 293 /* Root starts off empty. */ 294 for (i = nodewords; --i >= 0;) 295 root->dom[i] = 0; 296 297 /* root->level is the highest level no found. */ 298 for (i = root->level; i >= 0; --i) { 299 for (b = levels[i]; b; b = b->link) { 300 SET_INSERT(b->dom, b->id); 301 if (JT(b) == 0) 302 continue; 303 SET_INTERSECT(JT(b)->dom, b->dom, nodewords); 304 SET_INTERSECT(JF(b)->dom, b->dom, nodewords); 305 } 306 } 307 } 308 309 static void 310 propedom(ep) 311 struct edge *ep; 312 { 313 SET_INSERT(ep->edom, ep->id); 314 if (ep->succ) { 315 SET_INTERSECT(ep->succ->et.edom, ep->edom, edgewords); 316 SET_INTERSECT(ep->succ->ef.edom, ep->edom, edgewords); 317 } 318 } 319 320 /* 321 * Compute edge dominators. 322 * Assumes graph has been leveled and predecessors established. 323 */ 324 static void 325 find_edom(root) 326 struct block *root; 327 { 328 int i; 329 uset x; 330 struct block *b; 331 332 x = all_edge_sets; 333 for (i = n_edges * edgewords; --i >= 0; ) 334 x[i] = ~0; 335 336 /* root->level is the highest level no found. */ 337 memset(root->et.edom, 0, edgewords * sizeof(*(uset)0)); 338 memset(root->ef.edom, 0, edgewords * sizeof(*(uset)0)); 339 for (i = root->level; i >= 0; --i) { 340 for (b = levels[i]; b != 0; b = b->link) { 341 propedom(&b->et); 342 propedom(&b->ef); 343 } 344 } 345 } 346 347 /* 348 * Find the backwards transitive closure of the flow graph. These sets 349 * are backwards in the sense that we find the set of nodes that reach 350 * a given node, not the set of nodes that can be reached by a node. 351 * 352 * Assumes graph has been leveled. 353 */ 354 static void 355 find_closure(root) 356 struct block *root; 357 { 358 int i; 359 struct block *b; 360 361 /* 362 * Initialize sets to contain no nodes. 363 */ 364 memset((char *)all_closure_sets, 0, 365 n_blocks * nodewords * sizeof(*all_closure_sets)); 366 367 /* root->level is the highest level no found. */ 368 for (i = root->level; i >= 0; --i) { 369 for (b = levels[i]; b; b = b->link) { 370 SET_INSERT(b->closure, b->id); 371 if (JT(b) == 0) 372 continue; 373 SET_UNION(JT(b)->closure, b->closure, nodewords); 374 SET_UNION(JF(b)->closure, b->closure, nodewords); 375 } 376 } 377 } 378 379 /* 380 * Return the register number that is used by s. If A and X are both 381 * used, return AX_ATOM. If no register is used, return -1. 382 * 383 * The implementation should probably change to an array access. 384 */ 385 static int 386 atomuse(s) 387 struct stmt *s; 388 { 389 register int c = s->code; 390 391 if (c == NOP) 392 return -1; 393 394 switch (BPF_CLASS(c)) { 395 396 case BPF_RET: 397 return (BPF_RVAL(c) == BPF_A) ? A_ATOM : 398 (BPF_RVAL(c) == BPF_X) ? X_ATOM : -1; 399 400 case BPF_LD: 401 case BPF_LDX: 402 return (BPF_MODE(c) == BPF_IND) ? X_ATOM : 403 (BPF_MODE(c) == BPF_MEM) ? s->k : -1; 404 405 case BPF_ST: 406 return A_ATOM; 407 408 case BPF_STX: 409 return X_ATOM; 410 411 case BPF_JMP: 412 case BPF_ALU: 413 if (BPF_SRC(c) == BPF_X) 414 return AX_ATOM; 415 return A_ATOM; 416 417 case BPF_MISC: 418 return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM; 419 } 420 abort(); 421 /* NOTREACHED */ 422 } 423 424 /* 425 * Return the register number that is defined by 's'. We assume that 426 * a single stmt cannot define more than one register. If no register 427 * is defined, return -1. 428 * 429 * The implementation should probably change to an array access. 430 */ 431 static int 432 atomdef(s) 433 struct stmt *s; 434 { 435 if (s->code == NOP) 436 return -1; 437 438 switch (BPF_CLASS(s->code)) { 439 440 case BPF_LD: 441 case BPF_ALU: 442 return A_ATOM; 443 444 case BPF_LDX: 445 return X_ATOM; 446 447 case BPF_ST: 448 case BPF_STX: 449 return s->k; 450 451 case BPF_MISC: 452 return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM; 453 } 454 return -1; 455 } 456 457 /* 458 * Compute the sets of registers used, defined, and killed by 'b'. 459 * 460 * "Used" means that a statement in 'b' uses the register before any 461 * statement in 'b' defines it, i.e. it uses the value left in 462 * that register by a predecessor block of this block. 463 * "Defined" means that a statement in 'b' defines it. 464 * "Killed" means that a statement in 'b' defines it before any 465 * statement in 'b' uses it, i.e. it kills the value left in that 466 * register by a predecessor block of this block. 467 */ 468 static void 469 compute_local_ud(b) 470 struct block *b; 471 { 472 struct slist *s; 473 atomset def = 0, use = 0, kill = 0; 474 int atom; 475 476 for (s = b->stmts; s; s = s->next) { 477 if (s->s.code == NOP) 478 continue; 479 atom = atomuse(&s->s); 480 if (atom >= 0) { 481 if (atom == AX_ATOM) { 482 if (!ATOMELEM(def, X_ATOM)) 483 use |= ATOMMASK(X_ATOM); 484 if (!ATOMELEM(def, A_ATOM)) 485 use |= ATOMMASK(A_ATOM); 486 } 487 else if (atom < N_ATOMS) { 488 if (!ATOMELEM(def, atom)) 489 use |= ATOMMASK(atom); 490 } 491 else 492 abort(); 493 } 494 atom = atomdef(&s->s); 495 if (atom >= 0) { 496 if (!ATOMELEM(use, atom)) 497 kill |= ATOMMASK(atom); 498 def |= ATOMMASK(atom); 499 } 500 } 501 if (BPF_CLASS(b->s.code) == BPF_JMP) { 502 /* 503 * XXX - what about RET? 504 */ 505 atom = atomuse(&b->s); 506 if (atom >= 0) { 507 if (atom == AX_ATOM) { 508 if (!ATOMELEM(def, X_ATOM)) 509 use |= ATOMMASK(X_ATOM); 510 if (!ATOMELEM(def, A_ATOM)) 511 use |= ATOMMASK(A_ATOM); 512 } 513 else if (atom < N_ATOMS) { 514 if (!ATOMELEM(def, atom)) 515 use |= ATOMMASK(atom); 516 } 517 else 518 abort(); 519 } 520 } 521 522 b->def = def; 523 b->kill = kill; 524 b->in_use = use; 525 } 526 527 /* 528 * Assume graph is already leveled. 529 */ 530 static void 531 find_ud(root) 532 struct block *root; 533 { 534 int i, maxlevel; 535 struct block *p; 536 537 /* 538 * root->level is the highest level no found; 539 * count down from there. 540 */ 541 maxlevel = root->level; 542 for (i = maxlevel; i >= 0; --i) 543 for (p = levels[i]; p; p = p->link) { 544 compute_local_ud(p); 545 p->out_use = 0; 546 } 547 548 for (i = 1; i <= maxlevel; ++i) { 549 for (p = levels[i]; p; p = p->link) { 550 p->out_use |= JT(p)->in_use | JF(p)->in_use; 551 p->in_use |= p->out_use &~ p->kill; 552 } 553 } 554 } 555 556 /* 557 * These data structures are used in a Cocke and Shwarz style 558 * value numbering scheme. Since the flowgraph is acyclic, 559 * exit values can be propagated from a node's predecessors 560 * provided it is uniquely defined. 561 */ 562 struct valnode { 563 int code; 564 int v0, v1; 565 int val; 566 struct valnode *next; 567 }; 568 569 #define MODULUS 213 570 static struct valnode *hashtbl[MODULUS]; 571 static int curval; 572 static int maxval; 573 574 /* Integer constants mapped with the load immediate opcode. */ 575 #define K(i) F(BPF_LD|BPF_IMM|BPF_W, i, 0L) 576 577 struct vmapinfo { 578 int is_const; 579 bpf_int32 const_val; 580 }; 581 582 struct vmapinfo *vmap; 583 struct valnode *vnode_base; 584 struct valnode *next_vnode; 585 586 static void 587 init_val() 588 { 589 curval = 0; 590 next_vnode = vnode_base; 591 memset((char *)vmap, 0, maxval * sizeof(*vmap)); 592 memset((char *)hashtbl, 0, sizeof hashtbl); 593 } 594 595 /* Because we really don't have an IR, this stuff is a little messy. */ 596 static int 597 F(code, v0, v1) 598 int code; 599 int v0, v1; 600 { 601 u_int hash; 602 int val; 603 struct valnode *p; 604 605 hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8); 606 hash %= MODULUS; 607 608 for (p = hashtbl[hash]; p; p = p->next) 609 if (p->code == code && p->v0 == v0 && p->v1 == v1) 610 return p->val; 611 612 val = ++curval; 613 if (BPF_MODE(code) == BPF_IMM && 614 (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) { 615 vmap[val].const_val = v0; 616 vmap[val].is_const = 1; 617 } 618 p = next_vnode++; 619 p->val = val; 620 p->code = code; 621 p->v0 = v0; 622 p->v1 = v1; 623 p->next = hashtbl[hash]; 624 hashtbl[hash] = p; 625 626 return val; 627 } 628 629 static inline void 630 vstore(s, valp, newval, alter) 631 struct stmt *s; 632 int *valp; 633 int newval; 634 int alter; 635 { 636 if (alter && *valp == newval) 637 s->code = NOP; 638 else 639 *valp = newval; 640 } 641 642 static void 643 fold_op(s, v0, v1) 644 struct stmt *s; 645 int v0, v1; 646 { 647 bpf_u_int32 a, b; 648 649 a = vmap[v0].const_val; 650 b = vmap[v1].const_val; 651 652 switch (BPF_OP(s->code)) { 653 case BPF_ADD: 654 a += b; 655 break; 656 657 case BPF_SUB: 658 a -= b; 659 break; 660 661 case BPF_MUL: 662 a *= b; 663 break; 664 665 case BPF_DIV: 666 if (b == 0) 667 bpf_error("division by zero"); 668 a /= b; 669 break; 670 671 case BPF_AND: 672 a &= b; 673 break; 674 675 case BPF_OR: 676 a |= b; 677 break; 678 679 case BPF_LSH: 680 a <<= b; 681 break; 682 683 case BPF_RSH: 684 a >>= b; 685 break; 686 687 case BPF_NEG: 688 a = -a; 689 break; 690 691 default: 692 abort(); 693 } 694 s->k = a; 695 s->code = BPF_LD|BPF_IMM; 696 done = 0; 697 } 698 699 static inline struct slist * 700 this_op(s) 701 struct slist *s; 702 { 703 while (s != 0 && s->s.code == NOP) 704 s = s->next; 705 return s; 706 } 707 708 static void 709 opt_not(b) 710 struct block *b; 711 { 712 struct block *tmp = JT(b); 713 714 JT(b) = JF(b); 715 JF(b) = tmp; 716 } 717 718 static void 719 opt_peep(b) 720 struct block *b; 721 { 722 struct slist *s; 723 struct slist *next, *last; 724 int val; 725 726 s = b->stmts; 727 if (s == 0) 728 return; 729 730 last = s; 731 for (/*empty*/; /*empty*/; s = next) { 732 /* 733 * Skip over nops. 734 */ 735 s = this_op(s); 736 if (s == 0) 737 break; /* nothing left in the block */ 738 739 /* 740 * Find the next real instruction after that one 741 * (skipping nops). 742 */ 743 next = this_op(s->next); 744 if (next == 0) 745 break; /* no next instruction */ 746 last = next; 747 748 /* 749 * st M[k] --> st M[k] 750 * ldx M[k] tax 751 */ 752 if (s->s.code == BPF_ST && 753 next->s.code == (BPF_LDX|BPF_MEM) && 754 s->s.k == next->s.k) { 755 done = 0; 756 next->s.code = BPF_MISC|BPF_TAX; 757 } 758 /* 759 * ld #k --> ldx #k 760 * tax txa 761 */ 762 if (s->s.code == (BPF_LD|BPF_IMM) && 763 next->s.code == (BPF_MISC|BPF_TAX)) { 764 s->s.code = BPF_LDX|BPF_IMM; 765 next->s.code = BPF_MISC|BPF_TXA; 766 done = 0; 767 } 768 /* 769 * This is an ugly special case, but it happens 770 * when you say tcp[k] or udp[k] where k is a constant. 771 */ 772 if (s->s.code == (BPF_LD|BPF_IMM)) { 773 struct slist *add, *tax, *ild; 774 775 /* 776 * Check that X isn't used on exit from this 777 * block (which the optimizer might cause). 778 * We know the code generator won't generate 779 * any local dependencies. 780 */ 781 if (ATOMELEM(b->out_use, X_ATOM)) 782 continue; 783 784 /* 785 * Check that the instruction following the ldi 786 * is an addx, or it's an ldxms with an addx 787 * following it (with 0 or more nops between the 788 * ldxms and addx). 789 */ 790 if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B)) 791 add = next; 792 else 793 add = this_op(next->next); 794 if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X)) 795 continue; 796 797 /* 798 * Check that a tax follows that (with 0 or more 799 * nops between them). 800 */ 801 tax = this_op(add->next); 802 if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX)) 803 continue; 804 805 /* 806 * Check that an ild follows that (with 0 or more 807 * nops between them). 808 */ 809 ild = this_op(tax->next); 810 if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD || 811 BPF_MODE(ild->s.code) != BPF_IND) 812 continue; 813 /* 814 * We want to turn this sequence: 815 * 816 * (004) ldi #0x2 {s} 817 * (005) ldxms [14] {next} -- optional 818 * (006) addx {add} 819 * (007) tax {tax} 820 * (008) ild [x+0] {ild} 821 * 822 * into this sequence: 823 * 824 * (004) nop 825 * (005) ldxms [14] 826 * (006) nop 827 * (007) nop 828 * (008) ild [x+2] 829 * 830 * XXX We need to check that X is not 831 * subsequently used, because we want to change 832 * what'll be in it after this sequence. 833 * 834 * We know we can eliminate the accumulator 835 * modifications earlier in the sequence since 836 * it is defined by the last stmt of this sequence 837 * (i.e., the last statement of the sequence loads 838 * a value into the accumulator, so we can eliminate 839 * earlier operations on the accumulator). 840 */ 841 ild->s.k += s->s.k; 842 s->s.code = NOP; 843 add->s.code = NOP; 844 tax->s.code = NOP; 845 done = 0; 846 } 847 } 848 /* 849 * If the comparison at the end of a block is an equality 850 * comparison against a constant, and nobody uses the value 851 * we leave in the A register at the end of a block, and 852 * the operation preceding the comparison is an arithmetic 853 * operation, we can sometime optimize it away. 854 */ 855 if (b->s.code == (BPF_JMP|BPF_JEQ|BPF_K) && 856 !ATOMELEM(b->out_use, A_ATOM)) { 857 /* 858 * We can optimize away certain subtractions of the 859 * X register. 860 */ 861 if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X)) { 862 val = b->val[X_ATOM]; 863 if (vmap[val].is_const) { 864 /* 865 * If we have a subtract to do a comparison, 866 * and the X register is a known constant, 867 * we can merge this value into the 868 * comparison: 869 * 870 * sub x -> nop 871 * jeq #y jeq #(x+y) 872 */ 873 b->s.k += vmap[val].const_val; 874 last->s.code = NOP; 875 done = 0; 876 } else if (b->s.k == 0) { 877 /* 878 * If the X register isn't a constant, 879 * and the comparison in the test is 880 * against 0, we can compare with the 881 * X register, instead: 882 * 883 * sub x -> nop 884 * jeq #0 jeq x 885 */ 886 last->s.code = NOP; 887 b->s.code = BPF_JMP|BPF_JEQ|BPF_X; 888 done = 0; 889 } 890 } 891 /* 892 * Likewise, a constant subtract can be simplified: 893 * 894 * sub #x -> nop 895 * jeq #y -> jeq #(x+y) 896 */ 897 else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K)) { 898 last->s.code = NOP; 899 b->s.k += last->s.k; 900 done = 0; 901 } 902 /* 903 * And, similarly, a constant AND can be simplified 904 * if we're testing against 0, i.e.: 905 * 906 * and #k nop 907 * jeq #0 -> jset #k 908 */ 909 else if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) && 910 b->s.k == 0) { 911 b->s.k = last->s.k; 912 b->s.code = BPF_JMP|BPF_K|BPF_JSET; 913 last->s.code = NOP; 914 done = 0; 915 opt_not(b); 916 } 917 } 918 /* 919 * jset #0 -> never 920 * jset #ffffffff -> always 921 */ 922 if (b->s.code == (BPF_JMP|BPF_K|BPF_JSET)) { 923 if (b->s.k == 0) 924 JT(b) = JF(b); 925 if (b->s.k == (int)0xffffffff) 926 JF(b) = JT(b); 927 } 928 /* 929 * If we're comparing against the index register, and the index 930 * register is a known constant, we can just compare against that 931 * constant. 932 */ 933 val = b->val[X_ATOM]; 934 if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_X) { 935 bpf_int32 v = vmap[val].const_val; 936 b->s.code &= ~BPF_X; 937 b->s.k = v; 938 } 939 /* 940 * If the accumulator is a known constant, we can compute the 941 * comparison result. 942 */ 943 val = b->val[A_ATOM]; 944 if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) { 945 bpf_int32 v = vmap[val].const_val; 946 switch (BPF_OP(b->s.code)) { 947 948 case BPF_JEQ: 949 v = v == b->s.k; 950 break; 951 952 case BPF_JGT: 953 v = (unsigned)v > (unsigned)b->s.k; 954 break; 955 956 case BPF_JGE: 957 v = (unsigned)v >= (unsigned)b->s.k; 958 break; 959 960 case BPF_JSET: 961 v &= b->s.k; 962 break; 963 964 default: 965 abort(); 966 } 967 if (JF(b) != JT(b)) 968 done = 0; 969 if (v) 970 JF(b) = JT(b); 971 else 972 JT(b) = JF(b); 973 } 974 } 975 976 /* 977 * Compute the symbolic value of expression of 's', and update 978 * anything it defines in the value table 'val'. If 'alter' is true, 979 * do various optimizations. This code would be cleaner if symbolic 980 * evaluation and code transformations weren't folded together. 981 */ 982 static void 983 opt_stmt(s, val, alter) 984 struct stmt *s; 985 int val[]; 986 int alter; 987 { 988 int op; 989 int v; 990 991 switch (s->code) { 992 993 case BPF_LD|BPF_ABS|BPF_W: 994 case BPF_LD|BPF_ABS|BPF_H: 995 case BPF_LD|BPF_ABS|BPF_B: 996 v = F(s->code, s->k, 0L); 997 vstore(s, &val[A_ATOM], v, alter); 998 break; 999 1000 case BPF_LD|BPF_IND|BPF_W: 1001 case BPF_LD|BPF_IND|BPF_H: 1002 case BPF_LD|BPF_IND|BPF_B: 1003 v = val[X_ATOM]; 1004 if (alter && vmap[v].is_const) { 1005 s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code); 1006 s->k += vmap[v].const_val; 1007 v = F(s->code, s->k, 0L); 1008 done = 0; 1009 } 1010 else 1011 v = F(s->code, s->k, v); 1012 vstore(s, &val[A_ATOM], v, alter); 1013 break; 1014 1015 case BPF_LD|BPF_LEN: 1016 v = F(s->code, 0L, 0L); 1017 vstore(s, &val[A_ATOM], v, alter); 1018 break; 1019 1020 case BPF_LD|BPF_IMM: 1021 v = K(s->k); 1022 vstore(s, &val[A_ATOM], v, alter); 1023 break; 1024 1025 case BPF_LDX|BPF_IMM: 1026 v = K(s->k); 1027 vstore(s, &val[X_ATOM], v, alter); 1028 break; 1029 1030 case BPF_LDX|BPF_MSH|BPF_B: 1031 v = F(s->code, s->k, 0L); 1032 vstore(s, &val[X_ATOM], v, alter); 1033 break; 1034 1035 case BPF_ALU|BPF_NEG: 1036 if (alter && vmap[val[A_ATOM]].is_const) { 1037 s->code = BPF_LD|BPF_IMM; 1038 s->k = -vmap[val[A_ATOM]].const_val; 1039 val[A_ATOM] = K(s->k); 1040 } 1041 else 1042 val[A_ATOM] = F(s->code, val[A_ATOM], 0L); 1043 break; 1044 1045 case BPF_ALU|BPF_ADD|BPF_K: 1046 case BPF_ALU|BPF_SUB|BPF_K: 1047 case BPF_ALU|BPF_MUL|BPF_K: 1048 case BPF_ALU|BPF_DIV|BPF_K: 1049 case BPF_ALU|BPF_AND|BPF_K: 1050 case BPF_ALU|BPF_OR|BPF_K: 1051 case BPF_ALU|BPF_LSH|BPF_K: 1052 case BPF_ALU|BPF_RSH|BPF_K: 1053 op = BPF_OP(s->code); 1054 if (alter) { 1055 if (s->k == 0) { 1056 /* don't optimize away "sub #0" 1057 * as it may be needed later to 1058 * fixup the generated math code */ 1059 if (op == BPF_ADD || 1060 op == BPF_LSH || op == BPF_RSH || 1061 op == BPF_OR) { 1062 s->code = NOP; 1063 break; 1064 } 1065 if (op == BPF_MUL || op == BPF_AND) { 1066 s->code = BPF_LD|BPF_IMM; 1067 val[A_ATOM] = K(s->k); 1068 break; 1069 } 1070 } 1071 if (vmap[val[A_ATOM]].is_const) { 1072 fold_op(s, val[A_ATOM], K(s->k)); 1073 val[A_ATOM] = K(s->k); 1074 break; 1075 } 1076 } 1077 val[A_ATOM] = F(s->code, val[A_ATOM], K(s->k)); 1078 break; 1079 1080 case BPF_ALU|BPF_ADD|BPF_X: 1081 case BPF_ALU|BPF_SUB|BPF_X: 1082 case BPF_ALU|BPF_MUL|BPF_X: 1083 case BPF_ALU|BPF_DIV|BPF_X: 1084 case BPF_ALU|BPF_AND|BPF_X: 1085 case BPF_ALU|BPF_OR|BPF_X: 1086 case BPF_ALU|BPF_LSH|BPF_X: 1087 case BPF_ALU|BPF_RSH|BPF_X: 1088 op = BPF_OP(s->code); 1089 if (alter && vmap[val[X_ATOM]].is_const) { 1090 if (vmap[val[A_ATOM]].is_const) { 1091 fold_op(s, val[A_ATOM], val[X_ATOM]); 1092 val[A_ATOM] = K(s->k); 1093 } 1094 else { 1095 s->code = BPF_ALU|BPF_K|op; 1096 s->k = vmap[val[X_ATOM]].const_val; 1097 done = 0; 1098 val[A_ATOM] = 1099 F(s->code, val[A_ATOM], K(s->k)); 1100 } 1101 break; 1102 } 1103 /* 1104 * Check if we're doing something to an accumulator 1105 * that is 0, and simplify. This may not seem like 1106 * much of a simplification but it could open up further 1107 * optimizations. 1108 * XXX We could also check for mul by 1, etc. 1109 */ 1110 if (alter && vmap[val[A_ATOM]].is_const 1111 && vmap[val[A_ATOM]].const_val == 0) { 1112 if (op == BPF_ADD || op == BPF_OR) { 1113 s->code = BPF_MISC|BPF_TXA; 1114 vstore(s, &val[A_ATOM], val[X_ATOM], alter); 1115 break; 1116 } 1117 else if (op == BPF_MUL || op == BPF_DIV || 1118 op == BPF_AND || op == BPF_LSH || op == BPF_RSH) { 1119 s->code = BPF_LD|BPF_IMM; 1120 s->k = 0; 1121 vstore(s, &val[A_ATOM], K(s->k), alter); 1122 break; 1123 } 1124 else if (op == BPF_NEG) { 1125 s->code = NOP; 1126 break; 1127 } 1128 } 1129 val[A_ATOM] = F(s->code, val[A_ATOM], val[X_ATOM]); 1130 break; 1131 1132 case BPF_MISC|BPF_TXA: 1133 vstore(s, &val[A_ATOM], val[X_ATOM], alter); 1134 break; 1135 1136 case BPF_LD|BPF_MEM: 1137 v = val[s->k]; 1138 if (alter && vmap[v].is_const) { 1139 s->code = BPF_LD|BPF_IMM; 1140 s->k = vmap[v].const_val; 1141 done = 0; 1142 } 1143 vstore(s, &val[A_ATOM], v, alter); 1144 break; 1145 1146 case BPF_MISC|BPF_TAX: 1147 vstore(s, &val[X_ATOM], val[A_ATOM], alter); 1148 break; 1149 1150 case BPF_LDX|BPF_MEM: 1151 v = val[s->k]; 1152 if (alter && vmap[v].is_const) { 1153 s->code = BPF_LDX|BPF_IMM; 1154 s->k = vmap[v].const_val; 1155 done = 0; 1156 } 1157 vstore(s, &val[X_ATOM], v, alter); 1158 break; 1159 1160 case BPF_ST: 1161 vstore(s, &val[s->k], val[A_ATOM], alter); 1162 break; 1163 1164 case BPF_STX: 1165 vstore(s, &val[s->k], val[X_ATOM], alter); 1166 break; 1167 } 1168 } 1169 1170 static void 1171 deadstmt(s, last) 1172 register struct stmt *s; 1173 register struct stmt *last[]; 1174 { 1175 register int atom; 1176 1177 atom = atomuse(s); 1178 if (atom >= 0) { 1179 if (atom == AX_ATOM) { 1180 last[X_ATOM] = 0; 1181 last[A_ATOM] = 0; 1182 } 1183 else 1184 last[atom] = 0; 1185 } 1186 atom = atomdef(s); 1187 if (atom >= 0) { 1188 if (last[atom]) { 1189 done = 0; 1190 last[atom]->code = NOP; 1191 } 1192 last[atom] = s; 1193 } 1194 } 1195 1196 static void 1197 opt_deadstores(b) 1198 register struct block *b; 1199 { 1200 register struct slist *s; 1201 register int atom; 1202 struct stmt *last[N_ATOMS]; 1203 1204 memset((char *)last, 0, sizeof last); 1205 1206 for (s = b->stmts; s != 0; s = s->next) 1207 deadstmt(&s->s, last); 1208 deadstmt(&b->s, last); 1209 1210 for (atom = 0; atom < N_ATOMS; ++atom) 1211 if (last[atom] && !ATOMELEM(b->out_use, atom)) { 1212 last[atom]->code = NOP; 1213 done = 0; 1214 } 1215 } 1216 1217 static void 1218 opt_blk(b, do_stmts) 1219 struct block *b; 1220 int do_stmts; 1221 { 1222 struct slist *s; 1223 struct edge *p; 1224 int i; 1225 bpf_int32 aval, xval; 1226 1227 #if 0 1228 for (s = b->stmts; s && s->next; s = s->next) 1229 if (BPF_CLASS(s->s.code) == BPF_JMP) { 1230 do_stmts = 0; 1231 break; 1232 } 1233 #endif 1234 1235 /* 1236 * Initialize the atom values. 1237 */ 1238 p = b->in_edges; 1239 if (p == 0) { 1240 /* 1241 * We have no predecessors, so everything is undefined 1242 * upon entry to this block. 1243 */ 1244 memset((char *)b->val, 0, sizeof(b->val)); 1245 } else { 1246 /* 1247 * Inherit values from our predecessors. 1248 * 1249 * First, get the values from the predecessor along the 1250 * first edge leading to this node. 1251 */ 1252 memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val)); 1253 /* 1254 * Now look at all the other nodes leading to this node. 1255 * If, for the predecessor along that edge, a register 1256 * has a different value from the one we have (i.e., 1257 * control paths are merging, and the merging paths 1258 * assign different values to that register), give the 1259 * register the undefined value of 0. 1260 */ 1261 while ((p = p->next) != NULL) { 1262 for (i = 0; i < N_ATOMS; ++i) 1263 if (b->val[i] != p->pred->val[i]) 1264 b->val[i] = 0; 1265 } 1266 } 1267 aval = b->val[A_ATOM]; 1268 xval = b->val[X_ATOM]; 1269 for (s = b->stmts; s; s = s->next) 1270 opt_stmt(&s->s, b->val, do_stmts); 1271 1272 /* 1273 * This is a special case: if we don't use anything from this 1274 * block, and we load the accumulator or index register with a 1275 * value that is already there, or if this block is a return, 1276 * eliminate all the statements. 1277 * 1278 * XXX - what if it does a store? 1279 * 1280 * XXX - why does it matter whether we use anything from this 1281 * block? If the accumulator or index register doesn't change 1282 * its value, isn't that OK even if we use that value? 1283 * 1284 * XXX - if we load the accumulator with a different value, 1285 * and the block ends with a conditional branch, we obviously 1286 * can't eliminate it, as the branch depends on that value. 1287 * For the index register, the conditional branch only depends 1288 * on the index register value if the test is against the index 1289 * register value rather than a constant; if nothing uses the 1290 * value we put into the index register, and we're not testing 1291 * against the index register's value, and there aren't any 1292 * other problems that would keep us from eliminating this 1293 * block, can we eliminate it? 1294 */ 1295 if (do_stmts && 1296 ((b->out_use == 0 && aval != 0 && b->val[A_ATOM] == aval && 1297 xval != 0 && b->val[X_ATOM] == xval) || 1298 BPF_CLASS(b->s.code) == BPF_RET)) { 1299 if (b->stmts != 0) { 1300 b->stmts = 0; 1301 done = 0; 1302 } 1303 } else { 1304 opt_peep(b); 1305 opt_deadstores(b); 1306 } 1307 /* 1308 * Set up values for branch optimizer. 1309 */ 1310 if (BPF_SRC(b->s.code) == BPF_K) 1311 b->oval = K(b->s.k); 1312 else 1313 b->oval = b->val[X_ATOM]; 1314 b->et.code = b->s.code; 1315 b->ef.code = -b->s.code; 1316 } 1317 1318 /* 1319 * Return true if any register that is used on exit from 'succ', has 1320 * an exit value that is different from the corresponding exit value 1321 * from 'b'. 1322 */ 1323 static int 1324 use_conflict(b, succ) 1325 struct block *b, *succ; 1326 { 1327 int atom; 1328 atomset use = succ->out_use; 1329 1330 if (use == 0) 1331 return 0; 1332 1333 for (atom = 0; atom < N_ATOMS; ++atom) 1334 if (ATOMELEM(use, atom)) 1335 if (b->val[atom] != succ->val[atom]) 1336 return 1; 1337 return 0; 1338 } 1339 1340 static struct block * 1341 fold_edge(child, ep) 1342 struct block *child; 1343 struct edge *ep; 1344 { 1345 int sense; 1346 int aval0, aval1, oval0, oval1; 1347 int code = ep->code; 1348 1349 if (code < 0) { 1350 code = -code; 1351 sense = 0; 1352 } else 1353 sense = 1; 1354 1355 if (child->s.code != code) 1356 return 0; 1357 1358 aval0 = child->val[A_ATOM]; 1359 oval0 = child->oval; 1360 aval1 = ep->pred->val[A_ATOM]; 1361 oval1 = ep->pred->oval; 1362 1363 if (aval0 != aval1) 1364 return 0; 1365 1366 if (oval0 == oval1) 1367 /* 1368 * The operands of the branch instructions are 1369 * identical, so the result is true if a true 1370 * branch was taken to get here, otherwise false. 1371 */ 1372 return sense ? JT(child) : JF(child); 1373 1374 if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K)) 1375 /* 1376 * At this point, we only know the comparison if we 1377 * came down the true branch, and it was an equality 1378 * comparison with a constant. 1379 * 1380 * I.e., if we came down the true branch, and the branch 1381 * was an equality comparison with a constant, we know the 1382 * accumulator contains that constant. If we came down 1383 * the false branch, or the comparison wasn't with a 1384 * constant, we don't know what was in the accumulator. 1385 * 1386 * We rely on the fact that distinct constants have distinct 1387 * value numbers. 1388 */ 1389 return JF(child); 1390 1391 return 0; 1392 } 1393 1394 static void 1395 opt_j(ep) 1396 struct edge *ep; 1397 { 1398 register int i, k; 1399 register struct block *target; 1400 1401 if (JT(ep->succ) == 0) 1402 return; 1403 1404 if (JT(ep->succ) == JF(ep->succ)) { 1405 /* 1406 * Common branch targets can be eliminated, provided 1407 * there is no data dependency. 1408 */ 1409 if (!use_conflict(ep->pred, ep->succ->et.succ)) { 1410 done = 0; 1411 ep->succ = JT(ep->succ); 1412 } 1413 } 1414 /* 1415 * For each edge dominator that matches the successor of this 1416 * edge, promote the edge successor to the its grandchild. 1417 * 1418 * XXX We violate the set abstraction here in favor a reasonably 1419 * efficient loop. 1420 */ 1421 top: 1422 for (i = 0; i < edgewords; ++i) { 1423 register bpf_u_int32 x = ep->edom[i]; 1424 1425 while (x != 0) { 1426 k = ffs(x) - 1; 1427 x &=~ (1 << k); 1428 k += i * BITS_PER_WORD; 1429 1430 target = fold_edge(ep->succ, edges[k]); 1431 /* 1432 * Check that there is no data dependency between 1433 * nodes that will be violated if we move the edge. 1434 */ 1435 if (target != 0 && !use_conflict(ep->pred, target)) { 1436 done = 0; 1437 ep->succ = target; 1438 if (JT(target) != 0) 1439 /* 1440 * Start over unless we hit a leaf. 1441 */ 1442 goto top; 1443 return; 1444 } 1445 } 1446 } 1447 } 1448 1449 1450 static void 1451 or_pullup(b) 1452 struct block *b; 1453 { 1454 int val, at_top; 1455 struct block *pull; 1456 struct block **diffp, **samep; 1457 struct edge *ep; 1458 1459 ep = b->in_edges; 1460 if (ep == 0) 1461 return; 1462 1463 /* 1464 * Make sure each predecessor loads the same value. 1465 * XXX why? 1466 */ 1467 val = ep->pred->val[A_ATOM]; 1468 for (ep = ep->next; ep != 0; ep = ep->next) 1469 if (val != ep->pred->val[A_ATOM]) 1470 return; 1471 1472 if (JT(b->in_edges->pred) == b) 1473 diffp = &JT(b->in_edges->pred); 1474 else 1475 diffp = &JF(b->in_edges->pred); 1476 1477 at_top = 1; 1478 while (1) { 1479 if (*diffp == 0) 1480 return; 1481 1482 if (JT(*diffp) != JT(b)) 1483 return; 1484 1485 if (!SET_MEMBER((*diffp)->dom, b->id)) 1486 return; 1487 1488 if ((*diffp)->val[A_ATOM] != val) 1489 break; 1490 1491 diffp = &JF(*diffp); 1492 at_top = 0; 1493 } 1494 samep = &JF(*diffp); 1495 while (1) { 1496 if (*samep == 0) 1497 return; 1498 1499 if (JT(*samep) != JT(b)) 1500 return; 1501 1502 if (!SET_MEMBER((*samep)->dom, b->id)) 1503 return; 1504 1505 if ((*samep)->val[A_ATOM] == val) 1506 break; 1507 1508 /* XXX Need to check that there are no data dependencies 1509 between dp0 and dp1. Currently, the code generator 1510 will not produce such dependencies. */ 1511 samep = &JF(*samep); 1512 } 1513 #ifdef notdef 1514 /* XXX This doesn't cover everything. */ 1515 for (i = 0; i < N_ATOMS; ++i) 1516 if ((*samep)->val[i] != pred->val[i]) 1517 return; 1518 #endif 1519 /* Pull up the node. */ 1520 pull = *samep; 1521 *samep = JF(pull); 1522 JF(pull) = *diffp; 1523 1524 /* 1525 * At the top of the chain, each predecessor needs to point at the 1526 * pulled up node. Inside the chain, there is only one predecessor 1527 * to worry about. 1528 */ 1529 if (at_top) { 1530 for (ep = b->in_edges; ep != 0; ep = ep->next) { 1531 if (JT(ep->pred) == b) 1532 JT(ep->pred) = pull; 1533 else 1534 JF(ep->pred) = pull; 1535 } 1536 } 1537 else 1538 *diffp = pull; 1539 1540 done = 0; 1541 } 1542 1543 static void 1544 and_pullup(b) 1545 struct block *b; 1546 { 1547 int val, at_top; 1548 struct block *pull; 1549 struct block **diffp, **samep; 1550 struct edge *ep; 1551 1552 ep = b->in_edges; 1553 if (ep == 0) 1554 return; 1555 1556 /* 1557 * Make sure each predecessor loads the same value. 1558 */ 1559 val = ep->pred->val[A_ATOM]; 1560 for (ep = ep->next; ep != 0; ep = ep->next) 1561 if (val != ep->pred->val[A_ATOM]) 1562 return; 1563 1564 if (JT(b->in_edges->pred) == b) 1565 diffp = &JT(b->in_edges->pred); 1566 else 1567 diffp = &JF(b->in_edges->pred); 1568 1569 at_top = 1; 1570 while (1) { 1571 if (*diffp == 0) 1572 return; 1573 1574 if (JF(*diffp) != JF(b)) 1575 return; 1576 1577 if (!SET_MEMBER((*diffp)->dom, b->id)) 1578 return; 1579 1580 if ((*diffp)->val[A_ATOM] != val) 1581 break; 1582 1583 diffp = &JT(*diffp); 1584 at_top = 0; 1585 } 1586 samep = &JT(*diffp); 1587 while (1) { 1588 if (*samep == 0) 1589 return; 1590 1591 if (JF(*samep) != JF(b)) 1592 return; 1593 1594 if (!SET_MEMBER((*samep)->dom, b->id)) 1595 return; 1596 1597 if ((*samep)->val[A_ATOM] == val) 1598 break; 1599 1600 /* XXX Need to check that there are no data dependencies 1601 between diffp and samep. Currently, the code generator 1602 will not produce such dependencies. */ 1603 samep = &JT(*samep); 1604 } 1605 #ifdef notdef 1606 /* XXX This doesn't cover everything. */ 1607 for (i = 0; i < N_ATOMS; ++i) 1608 if ((*samep)->val[i] != pred->val[i]) 1609 return; 1610 #endif 1611 /* Pull up the node. */ 1612 pull = *samep; 1613 *samep = JT(pull); 1614 JT(pull) = *diffp; 1615 1616 /* 1617 * At the top of the chain, each predecessor needs to point at the 1618 * pulled up node. Inside the chain, there is only one predecessor 1619 * to worry about. 1620 */ 1621 if (at_top) { 1622 for (ep = b->in_edges; ep != 0; ep = ep->next) { 1623 if (JT(ep->pred) == b) 1624 JT(ep->pred) = pull; 1625 else 1626 JF(ep->pred) = pull; 1627 } 1628 } 1629 else 1630 *diffp = pull; 1631 1632 done = 0; 1633 } 1634 1635 static void 1636 opt_blks(root, do_stmts) 1637 struct block *root; 1638 int do_stmts; 1639 { 1640 int i, maxlevel; 1641 struct block *p; 1642 1643 init_val(); 1644 maxlevel = root->level; 1645 1646 find_inedges(root); 1647 for (i = maxlevel; i >= 0; --i) 1648 for (p = levels[i]; p; p = p->link) 1649 opt_blk(p, do_stmts); 1650 1651 if (do_stmts) 1652 /* 1653 * No point trying to move branches; it can't possibly 1654 * make a difference at this point. 1655 */ 1656 return; 1657 1658 for (i = 1; i <= maxlevel; ++i) { 1659 for (p = levels[i]; p; p = p->link) { 1660 opt_j(&p->et); 1661 opt_j(&p->ef); 1662 } 1663 } 1664 1665 find_inedges(root); 1666 for (i = 1; i <= maxlevel; ++i) { 1667 for (p = levels[i]; p; p = p->link) { 1668 or_pullup(p); 1669 and_pullup(p); 1670 } 1671 } 1672 } 1673 1674 static inline void 1675 link_inedge(parent, child) 1676 struct edge *parent; 1677 struct block *child; 1678 { 1679 parent->next = child->in_edges; 1680 child->in_edges = parent; 1681 } 1682 1683 static void 1684 find_inedges(root) 1685 struct block *root; 1686 { 1687 int i; 1688 struct block *b; 1689 1690 for (i = 0; i < n_blocks; ++i) 1691 blocks[i]->in_edges = 0; 1692 1693 /* 1694 * Traverse the graph, adding each edge to the predecessor 1695 * list of its successors. Skip the leaves (i.e. level 0). 1696 */ 1697 for (i = root->level; i > 0; --i) { 1698 for (b = levels[i]; b != 0; b = b->link) { 1699 link_inedge(&b->et, JT(b)); 1700 link_inedge(&b->ef, JF(b)); 1701 } 1702 } 1703 } 1704 1705 static void 1706 opt_root(b) 1707 struct block **b; 1708 { 1709 struct slist *tmp, *s; 1710 1711 s = (*b)->stmts; 1712 (*b)->stmts = 0; 1713 while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b)) 1714 *b = JT(*b); 1715 1716 tmp = (*b)->stmts; 1717 if (tmp != 0) 1718 sappend(s, tmp); 1719 (*b)->stmts = s; 1720 1721 /* 1722 * If the root node is a return, then there is no 1723 * point executing any statements (since the bpf machine 1724 * has no side effects). 1725 */ 1726 if (BPF_CLASS((*b)->s.code) == BPF_RET) 1727 (*b)->stmts = 0; 1728 } 1729 1730 static void 1731 opt_loop(root, do_stmts) 1732 struct block *root; 1733 int do_stmts; 1734 { 1735 1736 #ifdef BDEBUG 1737 if (dflag > 1) { 1738 printf("opt_loop(root, %d) begin\n", do_stmts); 1739 opt_dump(root); 1740 } 1741 #endif 1742 do { 1743 done = 1; 1744 find_levels(root); 1745 find_dom(root); 1746 find_closure(root); 1747 find_ud(root); 1748 find_edom(root); 1749 opt_blks(root, do_stmts); 1750 #ifdef BDEBUG 1751 if (dflag > 1) { 1752 printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, done); 1753 opt_dump(root); 1754 } 1755 #endif 1756 } while (!done); 1757 } 1758 1759 /* 1760 * Optimize the filter code in its dag representation. 1761 */ 1762 void 1763 bpf_optimize(rootp) 1764 struct block **rootp; 1765 { 1766 struct block *root; 1767 1768 root = *rootp; 1769 1770 opt_init(root); 1771 opt_loop(root, 0); 1772 opt_loop(root, 1); 1773 intern_blocks(root); 1774 #ifdef BDEBUG 1775 if (dflag > 1) { 1776 printf("after intern_blocks()\n"); 1777 opt_dump(root); 1778 } 1779 #endif 1780 opt_root(rootp); 1781 #ifdef BDEBUG 1782 if (dflag > 1) { 1783 printf("after opt_root()\n"); 1784 opt_dump(root); 1785 } 1786 #endif 1787 opt_cleanup(); 1788 } 1789 1790 static void 1791 make_marks(p) 1792 struct block *p; 1793 { 1794 if (!isMarked(p)) { 1795 Mark(p); 1796 if (BPF_CLASS(p->s.code) != BPF_RET) { 1797 make_marks(JT(p)); 1798 make_marks(JF(p)); 1799 } 1800 } 1801 } 1802 1803 /* 1804 * Mark code array such that isMarked(i) is true 1805 * only for nodes that are alive. 1806 */ 1807 static void 1808 mark_code(p) 1809 struct block *p; 1810 { 1811 cur_mark += 1; 1812 make_marks(p); 1813 } 1814 1815 /* 1816 * True iff the two stmt lists load the same value from the packet into 1817 * the accumulator. 1818 */ 1819 static int 1820 eq_slist(x, y) 1821 struct slist *x, *y; 1822 { 1823 while (1) { 1824 while (x && x->s.code == NOP) 1825 x = x->next; 1826 while (y && y->s.code == NOP) 1827 y = y->next; 1828 if (x == 0) 1829 return y == 0; 1830 if (y == 0) 1831 return x == 0; 1832 if (x->s.code != y->s.code || x->s.k != y->s.k) 1833 return 0; 1834 x = x->next; 1835 y = y->next; 1836 } 1837 } 1838 1839 static inline int 1840 eq_blk(b0, b1) 1841 struct block *b0, *b1; 1842 { 1843 if (b0->s.code == b1->s.code && 1844 b0->s.k == b1->s.k && 1845 b0->et.succ == b1->et.succ && 1846 b0->ef.succ == b1->ef.succ) 1847 return eq_slist(b0->stmts, b1->stmts); 1848 return 0; 1849 } 1850 1851 static void 1852 intern_blocks(root) 1853 struct block *root; 1854 { 1855 struct block *p; 1856 int i, j; 1857 int done1; /* don't shadow global */ 1858 top: 1859 done1 = 1; 1860 for (i = 0; i < n_blocks; ++i) 1861 blocks[i]->link = 0; 1862 1863 mark_code(root); 1864 1865 for (i = n_blocks - 1; --i >= 0; ) { 1866 if (!isMarked(blocks[i])) 1867 continue; 1868 for (j = i + 1; j < n_blocks; ++j) { 1869 if (!isMarked(blocks[j])) 1870 continue; 1871 if (eq_blk(blocks[i], blocks[j])) { 1872 blocks[i]->link = blocks[j]->link ? 1873 blocks[j]->link : blocks[j]; 1874 break; 1875 } 1876 } 1877 } 1878 for (i = 0; i < n_blocks; ++i) { 1879 p = blocks[i]; 1880 if (JT(p) == 0) 1881 continue; 1882 if (JT(p)->link) { 1883 done1 = 0; 1884 JT(p) = JT(p)->link; 1885 } 1886 if (JF(p)->link) { 1887 done1 = 0; 1888 JF(p) = JF(p)->link; 1889 } 1890 } 1891 if (!done1) 1892 goto top; 1893 } 1894 1895 static void 1896 opt_cleanup() 1897 { 1898 free((void *)vnode_base); 1899 free((void *)vmap); 1900 free((void *)edges); 1901 free((void *)space); 1902 free((void *)levels); 1903 free((void *)blocks); 1904 } 1905 1906 /* 1907 * Return the number of stmts in 's'. 1908 */ 1909 static u_int 1910 slength(s) 1911 struct slist *s; 1912 { 1913 u_int n = 0; 1914 1915 for (; s; s = s->next) 1916 if (s->s.code != NOP) 1917 ++n; 1918 return n; 1919 } 1920 1921 /* 1922 * Return the number of nodes reachable by 'p'. 1923 * All nodes should be initially unmarked. 1924 */ 1925 static int 1926 count_blocks(p) 1927 struct block *p; 1928 { 1929 if (p == 0 || isMarked(p)) 1930 return 0; 1931 Mark(p); 1932 return count_blocks(JT(p)) + count_blocks(JF(p)) + 1; 1933 } 1934 1935 /* 1936 * Do a depth first search on the flow graph, numbering the 1937 * the basic blocks, and entering them into the 'blocks' array.` 1938 */ 1939 static void 1940 number_blks_r(p) 1941 struct block *p; 1942 { 1943 int n; 1944 1945 if (p == 0 || isMarked(p)) 1946 return; 1947 1948 Mark(p); 1949 n = n_blocks++; 1950 p->id = n; 1951 blocks[n] = p; 1952 1953 number_blks_r(JT(p)); 1954 number_blks_r(JF(p)); 1955 } 1956 1957 /* 1958 * Return the number of stmts in the flowgraph reachable by 'p'. 1959 * The nodes should be unmarked before calling. 1960 * 1961 * Note that "stmts" means "instructions", and that this includes 1962 * 1963 * side-effect statements in 'p' (slength(p->stmts)); 1964 * 1965 * statements in the true branch from 'p' (count_stmts(JT(p))); 1966 * 1967 * statements in the false branch from 'p' (count_stmts(JF(p))); 1968 * 1969 * the conditional jump itself (1); 1970 * 1971 * an extra long jump if the true branch requires it (p->longjt); 1972 * 1973 * an extra long jump if the false branch requires it (p->longjf). 1974 */ 1975 static u_int 1976 count_stmts(p) 1977 struct block *p; 1978 { 1979 u_int n; 1980 1981 if (p == 0 || isMarked(p)) 1982 return 0; 1983 Mark(p); 1984 n = count_stmts(JT(p)) + count_stmts(JF(p)); 1985 return slength(p->stmts) + n + 1 + p->longjt + p->longjf; 1986 } 1987 1988 /* 1989 * Allocate memory. All allocation is done before optimization 1990 * is begun. A linear bound on the size of all data structures is computed 1991 * from the total number of blocks and/or statements. 1992 */ 1993 static void 1994 opt_init(root) 1995 struct block *root; 1996 { 1997 bpf_u_int32 *p; 1998 int i, n, max_stmts; 1999 2000 /* 2001 * First, count the blocks, so we can malloc an array to map 2002 * block number to block. Then, put the blocks into the array. 2003 */ 2004 unMarkAll(); 2005 n = count_blocks(root); 2006 blocks = (struct block **)calloc(n, sizeof(*blocks)); 2007 if (blocks == NULL) 2008 bpf_error("malloc"); 2009 unMarkAll(); 2010 n_blocks = 0; 2011 number_blks_r(root); 2012 2013 n_edges = 2 * n_blocks; 2014 edges = (struct edge **)calloc(n_edges, sizeof(*edges)); 2015 if (edges == NULL) 2016 bpf_error("malloc"); 2017 2018 /* 2019 * The number of levels is bounded by the number of nodes. 2020 */ 2021 levels = (struct block **)calloc(n_blocks, sizeof(*levels)); 2022 if (levels == NULL) 2023 bpf_error("malloc"); 2024 2025 edgewords = n_edges / (8 * sizeof(bpf_u_int32)) + 1; 2026 nodewords = n_blocks / (8 * sizeof(bpf_u_int32)) + 1; 2027 2028 /* XXX */ 2029 space = (bpf_u_int32 *)malloc(2 * n_blocks * nodewords * sizeof(*space) 2030 + n_edges * edgewords * sizeof(*space)); 2031 if (space == NULL) 2032 bpf_error("malloc"); 2033 p = space; 2034 all_dom_sets = p; 2035 for (i = 0; i < n; ++i) { 2036 blocks[i]->dom = p; 2037 p += nodewords; 2038 } 2039 all_closure_sets = p; 2040 for (i = 0; i < n; ++i) { 2041 blocks[i]->closure = p; 2042 p += nodewords; 2043 } 2044 all_edge_sets = p; 2045 for (i = 0; i < n; ++i) { 2046 register struct block *b = blocks[i]; 2047 2048 b->et.edom = p; 2049 p += edgewords; 2050 b->ef.edom = p; 2051 p += edgewords; 2052 b->et.id = i; 2053 edges[i] = &b->et; 2054 b->ef.id = n_blocks + i; 2055 edges[n_blocks + i] = &b->ef; 2056 b->et.pred = b; 2057 b->ef.pred = b; 2058 } 2059 max_stmts = 0; 2060 for (i = 0; i < n; ++i) 2061 max_stmts += slength(blocks[i]->stmts) + 1; 2062 /* 2063 * We allocate at most 3 value numbers per statement, 2064 * so this is an upper bound on the number of valnodes 2065 * we'll need. 2066 */ 2067 maxval = 3 * max_stmts; 2068 vmap = (struct vmapinfo *)calloc(maxval, sizeof(*vmap)); 2069 vnode_base = (struct valnode *)calloc(maxval, sizeof(*vnode_base)); 2070 if (vmap == NULL || vnode_base == NULL) 2071 bpf_error("malloc"); 2072 } 2073 2074 /* 2075 * Some pointers used to convert the basic block form of the code, 2076 * into the array form that BPF requires. 'fstart' will point to 2077 * the malloc'd array while 'ftail' is used during the recursive traversal. 2078 */ 2079 static struct bpf_insn *fstart; 2080 static struct bpf_insn *ftail; 2081 2082 #ifdef BDEBUG 2083 int bids[1000]; 2084 #endif 2085 2086 /* 2087 * Returns true if successful. Returns false if a branch has 2088 * an offset that is too large. If so, we have marked that 2089 * branch so that on a subsequent iteration, it will be treated 2090 * properly. 2091 */ 2092 static int 2093 convert_code_r(p) 2094 struct block *p; 2095 { 2096 struct bpf_insn *dst; 2097 struct slist *src; 2098 u_int slen; 2099 u_int off; 2100 int extrajmps; /* number of extra jumps inserted */ 2101 struct slist **offset = NULL; 2102 2103 if (p == 0 || isMarked(p)) 2104 return (1); 2105 Mark(p); 2106 2107 if (convert_code_r(JF(p)) == 0) 2108 return (0); 2109 if (convert_code_r(JT(p)) == 0) 2110 return (0); 2111 2112 slen = slength(p->stmts); 2113 dst = ftail -= (slen + 1 + p->longjt + p->longjf); 2114 /* inflate length by any extra jumps */ 2115 2116 p->offset = dst - fstart; 2117 2118 /* generate offset[] for convenience */ 2119 if (slen) { 2120 offset = (struct slist **)calloc(slen, sizeof(struct slist *)); 2121 if (!offset) { 2122 bpf_error("not enough core"); 2123 /*NOTREACHED*/ 2124 } 2125 } 2126 src = p->stmts; 2127 for (off = 0; off < slen && src; off++) { 2128 #if 0 2129 printf("off=%d src=%x\n", off, src); 2130 #endif 2131 offset[off] = src; 2132 src = src->next; 2133 } 2134 2135 off = 0; 2136 for (src = p->stmts; src; src = src->next) { 2137 if (src->s.code == NOP) 2138 continue; 2139 dst->code = (u_short)src->s.code; 2140 dst->k = src->s.k; 2141 2142 /* fill block-local relative jump */ 2143 if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) { 2144 #if 0 2145 if (src->s.jt || src->s.jf) { 2146 bpf_error("illegal jmp destination"); 2147 /*NOTREACHED*/ 2148 } 2149 #endif 2150 goto filled; 2151 } 2152 if (off == slen - 2) /*???*/ 2153 goto filled; 2154 2155 { 2156 u_int i; 2157 int jt, jf; 2158 static const char ljerr[] = "%s for block-local relative jump: off=%d"; 2159 2160 #if 0 2161 printf("code=%x off=%d %x %x\n", src->s.code, 2162 off, src->s.jt, src->s.jf); 2163 #endif 2164 2165 if (!src->s.jt || !src->s.jf) { 2166 bpf_error(ljerr, "no jmp destination", off); 2167 /*NOTREACHED*/ 2168 } 2169 2170 jt = jf = 0; 2171 for (i = 0; i < slen; i++) { 2172 if (offset[i] == src->s.jt) { 2173 if (jt) { 2174 bpf_error(ljerr, "multiple matches", off); 2175 /*NOTREACHED*/ 2176 } 2177 2178 dst->jt = i - off - 1; 2179 jt++; 2180 } 2181 if (offset[i] == src->s.jf) { 2182 if (jf) { 2183 bpf_error(ljerr, "multiple matches", off); 2184 /*NOTREACHED*/ 2185 } 2186 dst->jf = i - off - 1; 2187 jf++; 2188 } 2189 } 2190 if (!jt || !jf) { 2191 bpf_error(ljerr, "no destination found", off); 2192 /*NOTREACHED*/ 2193 } 2194 } 2195 filled: 2196 ++dst; 2197 ++off; 2198 } 2199 if (offset) 2200 free(offset); 2201 2202 #ifdef BDEBUG 2203 bids[dst - fstart] = p->id + 1; 2204 #endif 2205 dst->code = (u_short)p->s.code; 2206 dst->k = p->s.k; 2207 if (JT(p)) { 2208 extrajmps = 0; 2209 off = JT(p)->offset - (p->offset + slen) - 1; 2210 if (off >= 256) { 2211 /* offset too large for branch, must add a jump */ 2212 if (p->longjt == 0) { 2213 /* mark this instruction and retry */ 2214 p->longjt++; 2215 return(0); 2216 } 2217 /* branch if T to following jump */ 2218 dst->jt = extrajmps; 2219 extrajmps++; 2220 dst[extrajmps].code = BPF_JMP|BPF_JA; 2221 dst[extrajmps].k = off - extrajmps; 2222 } 2223 else 2224 dst->jt = off; 2225 off = JF(p)->offset - (p->offset + slen) - 1; 2226 if (off >= 256) { 2227 /* offset too large for branch, must add a jump */ 2228 if (p->longjf == 0) { 2229 /* mark this instruction and retry */ 2230 p->longjf++; 2231 return(0); 2232 } 2233 /* branch if F to following jump */ 2234 /* if two jumps are inserted, F goes to second one */ 2235 dst->jf = extrajmps; 2236 extrajmps++; 2237 dst[extrajmps].code = BPF_JMP|BPF_JA; 2238 dst[extrajmps].k = off - extrajmps; 2239 } 2240 else 2241 dst->jf = off; 2242 } 2243 return (1); 2244 } 2245 2246 2247 /* 2248 * Convert flowgraph intermediate representation to the 2249 * BPF array representation. Set *lenp to the number of instructions. 2250 * 2251 * This routine does *NOT* leak the memory pointed to by fp. It *must 2252 * not* do free(fp) before returning fp; doing so would make no sense, 2253 * as the BPF array pointed to by the return value of icode_to_fcode() 2254 * must be valid - it's being returned for use in a bpf_program structure. 2255 * 2256 * If it appears that icode_to_fcode() is leaking, the problem is that 2257 * the program using pcap_compile() is failing to free the memory in 2258 * the BPF program when it's done - the leak is in the program, not in 2259 * the routine that happens to be allocating the memory. (By analogy, if 2260 * a program calls fopen() without ever calling fclose() on the FILE *, 2261 * it will leak the FILE structure; the leak is not in fopen(), it's in 2262 * the program.) Change the program to use pcap_freecode() when it's 2263 * done with the filter program. See the pcap man page. 2264 */ 2265 struct bpf_insn * 2266 icode_to_fcode(root, lenp) 2267 struct block *root; 2268 u_int *lenp; 2269 { 2270 u_int n; 2271 struct bpf_insn *fp; 2272 2273 /* 2274 * Loop doing convert_code_r() until no branches remain 2275 * with too-large offsets. 2276 */ 2277 while (1) { 2278 unMarkAll(); 2279 n = *lenp = count_stmts(root); 2280 2281 fp = (struct bpf_insn *)malloc(sizeof(*fp) * n); 2282 if (fp == NULL) 2283 bpf_error("malloc"); 2284 memset((char *)fp, 0, sizeof(*fp) * n); 2285 fstart = fp; 2286 ftail = fp + n; 2287 2288 unMarkAll(); 2289 if (convert_code_r(root)) 2290 break; 2291 free(fp); 2292 } 2293 2294 return fp; 2295 } 2296 2297 /* 2298 * Make a copy of a BPF program and put it in the "fcode" member of 2299 * a "pcap_t". 2300 * 2301 * If we fail to allocate memory for the copy, fill in the "errbuf" 2302 * member of the "pcap_t" with an error message, and return -1; 2303 * otherwise, return 0. 2304 */ 2305 int 2306 install_bpf_program(pcap_t *p, struct bpf_program *fp) 2307 { 2308 size_t prog_size; 2309 2310 /* 2311 * Validate the program. 2312 */ 2313 if (!bpf_validate(fp->bf_insns, fp->bf_len)) { 2314 snprintf(p->errbuf, sizeof(p->errbuf), 2315 "BPF program is not valid"); 2316 return (-1); 2317 } 2318 2319 /* 2320 * Free up any already installed program. 2321 */ 2322 pcap_freecode(&p->fcode); 2323 2324 prog_size = sizeof(*fp->bf_insns) * fp->bf_len; 2325 p->fcode.bf_len = fp->bf_len; 2326 p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size); 2327 if (p->fcode.bf_insns == NULL) { 2328 snprintf(p->errbuf, sizeof(p->errbuf), 2329 "malloc: %s", pcap_strerror(errno)); 2330 return (-1); 2331 } 2332 memcpy(p->fcode.bf_insns, fp->bf_insns, prog_size); 2333 return (0); 2334 } 2335 2336 #ifdef BDEBUG 2337 static void 2338 opt_dump(root) 2339 struct block *root; 2340 { 2341 struct bpf_program f; 2342 2343 memset(bids, 0, sizeof bids); 2344 f.bf_insns = icode_to_fcode(root, &f.bf_len); 2345 bpf_dump(&f, 1); 2346 putchar('\n'); 2347 free((char *)f.bf_insns); 2348 } 2349 #endif 2350