1 /* $OpenBSD: optimize.c,v 1.2 1996/03/04 15:47:24 mickey Exp $ */ 2 /* $NetBSD: optimize.c,v 1.3 1995/04/29 05:42:28 cgd Exp $ */ 3 4 /* 5 * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that: (1) source code distributions 10 * retain the above copyright notice and this paragraph in its entirety, (2) 11 * distributions including binary code include the above copyright notice and 12 * this paragraph in its entirety in the documentation or other materials 13 * provided with the distribution, and (3) all advertising materials mentioning 14 * features or use of this software display the following acknowledgement: 15 * ``This product includes software developed by the University of California, 16 * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of 17 * the University nor the names of its contributors may be used to endorse 18 * or promote products derived from this software without specific prior 19 * written permission. 20 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED 21 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF 22 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 23 * 24 * Optimization module for tcpdump intermediate representation. 25 */ 26 #ifndef lint 27 static char rcsid[] = 28 "@(#) Header: optimize.c,v 1.45 94/06/20 19:07:55 leres Exp (LBL)"; 29 #endif 30 31 #include <sys/types.h> 32 #include <sys/time.h> 33 34 #include <net/bpf.h> 35 36 #include <stdio.h> 37 #ifdef __osf__ 38 #include <stdlib.h> 39 #include <malloc.h> 40 #endif 41 #ifdef __NetBSD__ 42 #include <stdlib.h> 43 #endif 44 #include <memory.h> 45 46 #include "gencode.h" 47 48 #ifndef __GNUC__ 49 #define inline 50 #endif 51 52 #define A_ATOM BPF_MEMWORDS 53 #define X_ATOM (BPF_MEMWORDS+1) 54 55 #define NOP -1 56 57 /* 58 * This define is used to represent *both* the accumulator and 59 * x register in use-def computations. 60 * Currently, the use-def code assumes only one definition per instruction. 61 */ 62 #define AX_ATOM N_ATOMS 63 64 /* 65 * A flag to indicate that further optimization is needed. 66 * Iterative passes are continued until a given pass yields no 67 * branch movement. 68 */ 69 static int done; 70 71 /* 72 * A block is marked if only if its mark equals the current mark. 73 * Rather than traverse the code array, marking each item, 'cur_mark' is 74 * incremented. This automatically makes each element unmarked. 75 */ 76 static int cur_mark; 77 #define isMarked(p) ((p)->mark == cur_mark) 78 #define unMarkAll() cur_mark += 1 79 #define Mark(p) ((p)->mark = cur_mark) 80 81 static void opt_init(struct block *); 82 static void opt_cleanup(void); 83 84 static void make_marks(struct block *); 85 static void mark_code(struct block *); 86 87 static void intern_blocks(struct block *); 88 89 static int eq_slist(struct slist *, struct slist *); 90 91 static void find_levels_r(struct block *); 92 93 static void find_levels(struct block *); 94 static void find_dom(struct block *); 95 static void propedom(struct edge *); 96 static void find_edom(struct block *); 97 static void find_closure(struct block *); 98 static int atomuse(struct stmt *); 99 static int atomdef(struct stmt *); 100 static void compute_local_ud(struct block *); 101 static void find_ud(struct block *); 102 static void init_val(void); 103 static long F(int, long, long); 104 static inline void vstore(struct stmt *, long *, long, int); 105 static void opt_blk(struct block *, int); 106 static int use_conflict(struct block *, struct block *); 107 static void opt_j(struct edge *); 108 static void or_pullup(struct block *); 109 static void and_pullup(struct block *); 110 static void opt_blks(struct block *, int); 111 static inline void link_inedge(struct edge *, struct block *); 112 static void find_inedges(struct block *); 113 static void opt_root(struct block **); 114 static void opt_loop(struct block *, int); 115 static void fold_op(struct stmt *, long, long); 116 static inline struct slist *this_op(struct slist *); 117 static void opt_not(struct block *); 118 static void opt_peep(struct block *); 119 static void opt_stmt(struct stmt *, long[], int); 120 static void deadstmt(struct stmt *, struct stmt *[]); 121 static void opt_deadstores(struct block *); 122 static void opt_blk(struct block *, int); 123 static int use_conflict(struct block *, struct block *); 124 static void opt_j(struct edge *); 125 static struct block *fold_edge(struct block *, struct edge *); 126 static inline int eq_blk(struct block *, struct block *); 127 static int slength(struct slist *); 128 static int count_blocks(struct block *); 129 static void number_blks_r(struct block *); 130 static int count_stmts(struct block *); 131 static void convert_code_r(struct block *); 132 133 static int n_blocks; 134 struct block **blocks; 135 static int n_edges; 136 struct edge **edges; 137 138 /* 139 * A bit vector set representation of the dominators. 140 * We round up the set size to the next power of two. 141 */ 142 static int nodewords; 143 static int edgewords; 144 struct block **levels; 145 u_long *space; 146 #define BITS_PER_WORD (8*sizeof(u_long)) 147 /* 148 * True if a is in uset {p} 149 */ 150 #define SET_MEMBER(p, a) \ 151 ((p)[(unsigned)(a) / BITS_PER_WORD] & (1 << ((unsigned)(a) % BITS_PER_WORD))) 152 153 /* 154 * Add 'a' to uset p. 155 */ 156 #define SET_INSERT(p, a) \ 157 (p)[(unsigned)(a) / BITS_PER_WORD] |= (1 << ((unsigned)(a) % BITS_PER_WORD)) 158 159 /* 160 * Delete 'a' from uset p. 161 */ 162 #define SET_DELETE(p, a) \ 163 (p)[(unsigned)(a) / BITS_PER_WORD] &= ~(1 << ((unsigned)(a) % BITS_PER_WORD)) 164 165 /* 166 * a := a intersect b 167 */ 168 #define SET_INTERSECT(a, b, n)\ 169 {\ 170 register u_long *_x = a, *_y = b;\ 171 register int _n = n;\ 172 while (--_n >= 0) *_x++ &= *_y++;\ 173 } 174 175 /* 176 * a := a - b 177 */ 178 #define SET_SUBTRACT(a, b, n)\ 179 {\ 180 register u_long *_x = a, *_y = b;\ 181 register int _n = n;\ 182 while (--_n >= 0) *_x++ &=~ *_y++;\ 183 } 184 185 /* 186 * a := a union b 187 */ 188 #define SET_UNION(a, b, n)\ 189 {\ 190 register u_long *_x = a, *_y = b;\ 191 register int _n = n;\ 192 while (--_n >= 0) *_x++ |= *_y++;\ 193 } 194 195 static uset all_dom_sets; 196 static uset all_closure_sets; 197 static uset all_edge_sets; 198 199 #ifndef MAX 200 #define MAX(a,b) ((a)>(b)?(a):(b)) 201 #endif 202 203 static void 204 find_levels_r(b) 205 struct block *b; 206 { 207 int level; 208 209 if (isMarked(b)) 210 return; 211 212 Mark(b); 213 b->link = 0; 214 215 if (JT(b)) { 216 find_levels_r(JT(b)); 217 find_levels_r(JF(b)); 218 level = MAX(JT(b)->level, JF(b)->level) + 1; 219 } else 220 level = 0; 221 b->level = level; 222 b->link = levels[level]; 223 levels[level] = b; 224 } 225 226 /* 227 * Level graph. The levels go from 0 at the leaves to 228 * N_LEVELS at the root. The levels[] array points to the 229 * first node of the level list, whose elements are linked 230 * with the 'link' field of the struct block. 231 */ 232 static void 233 find_levels(root) 234 struct block *root; 235 { 236 memset((char *)levels, 0, n_blocks * sizeof(*levels)); 237 unMarkAll(); 238 find_levels_r(root); 239 } 240 241 /* 242 * Find dominator relationships. 243 * Assumes graph has been leveled. 244 */ 245 static void 246 find_dom(root) 247 struct block *root; 248 { 249 int i; 250 struct block *b; 251 u_long *x; 252 253 /* 254 * Initialize sets to contain all nodes. 255 */ 256 x = all_dom_sets; 257 i = n_blocks * nodewords; 258 while (--i >= 0) 259 *x++ = ~0; 260 /* Root starts off empty. */ 261 for (i = nodewords; --i >= 0;) 262 root->dom[i] = 0; 263 264 /* root->level is the highest level no found. */ 265 for (i = root->level; i >= 0; --i) { 266 for (b = levels[i]; b; b = b->link) { 267 SET_INSERT(b->dom, b->id); 268 if (JT(b) == 0) 269 continue; 270 SET_INTERSECT(JT(b)->dom, b->dom, nodewords); 271 SET_INTERSECT(JF(b)->dom, b->dom, nodewords); 272 } 273 } 274 } 275 276 static void 277 propedom(ep) 278 struct edge *ep; 279 { 280 SET_INSERT(ep->edom, ep->id); 281 if (ep->succ) { 282 SET_INTERSECT(ep->succ->et.edom, ep->edom, edgewords); 283 SET_INTERSECT(ep->succ->ef.edom, ep->edom, edgewords); 284 } 285 } 286 287 /* 288 * Compute edge dominators. 289 * Assumes graph has been leveled and predecessors established. 290 */ 291 static void 292 find_edom(root) 293 struct block *root; 294 { 295 int i; 296 uset x; 297 struct block *b; 298 299 x = all_edge_sets; 300 for (i = n_edges * edgewords; --i >= 0; ) 301 x[i] = ~0; 302 303 /* root->level is the highest level no found. */ 304 memset(root->et.edom, 0, edgewords * sizeof(*(uset)0)); 305 memset(root->ef.edom, 0, edgewords * sizeof(*(uset)0)); 306 for (i = root->level; i >= 0; --i) { 307 for (b = levels[i]; b != 0; b = b->link) { 308 propedom(&b->et); 309 propedom(&b->ef); 310 } 311 } 312 } 313 314 /* 315 * Find the backwards transitive closure of the flow graph. These sets 316 * are backwards in the sense that we find the set of nodes that reach 317 * a given node, not the set of nodes that can be reached by a node. 318 * 319 * Assumes graph has been leveled. 320 */ 321 static void 322 find_closure(root) 323 struct block *root; 324 { 325 int i; 326 struct block *b; 327 328 /* 329 * Initialize sets to contain no nodes. 330 */ 331 memset((char *)all_closure_sets, 0, 332 n_blocks * nodewords * sizeof(*all_closure_sets)); 333 334 /* root->level is the highest level no found. */ 335 for (i = root->level; i >= 0; --i) { 336 for (b = levels[i]; b; b = b->link) { 337 SET_INSERT(b->closure, b->id); 338 if (JT(b) == 0) 339 continue; 340 SET_UNION(JT(b)->closure, b->closure, nodewords); 341 SET_UNION(JF(b)->closure, b->closure, nodewords); 342 } 343 } 344 } 345 346 /* 347 * Return the register number that is used by s. If A and X are both 348 * used, return AX_ATOM. If no register is used, return -1. 349 * 350 * The implementation should probably change to an array access. 351 */ 352 static int 353 atomuse(s) 354 struct stmt *s; 355 { 356 register int c = s->code; 357 358 if (c == NOP) 359 return -1; 360 361 switch (BPF_CLASS(c)) { 362 363 case BPF_RET: 364 return (BPF_RVAL(c) == BPF_A) ? A_ATOM : 365 (BPF_RVAL(c) == BPF_X) ? X_ATOM : -1; 366 367 case BPF_LD: 368 case BPF_LDX: 369 return (BPF_MODE(c) == BPF_IND) ? X_ATOM : 370 (BPF_MODE(c) == BPF_MEM) ? s->k : -1; 371 372 case BPF_ST: 373 return A_ATOM; 374 375 case BPF_STX: 376 return X_ATOM; 377 378 case BPF_JMP: 379 case BPF_ALU: 380 if (BPF_SRC(c) == BPF_X) 381 return AX_ATOM; 382 return A_ATOM; 383 384 case BPF_MISC: 385 return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM; 386 } 387 abort(); 388 /* NOTREACHED */ 389 } 390 391 /* 392 * Return the register number that is defined by 's'. We assume that 393 * a single stmt cannot define more than one register. If no register 394 * is defined, return -1. 395 * 396 * The implementation should probably change to an array access. 397 */ 398 static int 399 atomdef(s) 400 struct stmt *s; 401 { 402 if (s->code == NOP) 403 return -1; 404 405 switch (BPF_CLASS(s->code)) { 406 407 case BPF_LD: 408 case BPF_ALU: 409 return A_ATOM; 410 411 case BPF_LDX: 412 return X_ATOM; 413 414 case BPF_ST: 415 case BPF_STX: 416 return s->k; 417 418 case BPF_MISC: 419 return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM; 420 } 421 return -1; 422 } 423 424 static void 425 compute_local_ud(b) 426 struct block *b; 427 { 428 struct slist *s; 429 atomset def = 0, use = 0, kill = 0; 430 int atom; 431 432 for (s = b->stmts; s; s = s->next) { 433 if (s->s.code == NOP) 434 continue; 435 atom = atomuse(&s->s); 436 if (atom >= 0) { 437 if (atom == AX_ATOM) { 438 if (!ATOMELEM(def, X_ATOM)) 439 use |= ATOMMASK(X_ATOM); 440 if (!ATOMELEM(def, A_ATOM)) 441 use |= ATOMMASK(A_ATOM); 442 } 443 else if (atom < N_ATOMS) { 444 if (!ATOMELEM(def, atom)) 445 use |= ATOMMASK(atom); 446 } 447 else 448 abort(); 449 } 450 atom = atomdef(&s->s); 451 if (atom >= 0) { 452 if (!ATOMELEM(use, atom)) 453 kill |= ATOMMASK(atom); 454 def |= ATOMMASK(atom); 455 } 456 } 457 if (!ATOMELEM(def, A_ATOM) && BPF_CLASS(b->s.code) == BPF_JMP) 458 use |= ATOMMASK(A_ATOM); 459 460 b->def = def; 461 b->kill = kill; 462 b->in_use = use; 463 } 464 465 /* 466 * Assume graph is already leveled. 467 */ 468 static void 469 find_ud(root) 470 struct block *root; 471 { 472 int i, maxlevel; 473 struct block *p; 474 475 /* 476 * root->level is the highest level no found; 477 * count down from there. 478 */ 479 maxlevel = root->level; 480 for (i = maxlevel; i >= 0; --i) 481 for (p = levels[i]; p; p = p->link) { 482 compute_local_ud(p); 483 p->out_use = 0; 484 } 485 486 for (i = 1; i <= maxlevel; ++i) { 487 for (p = levels[i]; p; p = p->link) { 488 p->out_use |= JT(p)->in_use | JF(p)->in_use; 489 p->in_use |= p->out_use &~ p->kill; 490 } 491 } 492 } 493 494 /* 495 * These data structures are used in a Cocke and Shwarz style 496 * value numbering scheme. Since the flowgraph is acyclic, 497 * exit values can be propagated from a node's predecessors 498 * provided it is uniquely defined. 499 */ 500 struct valnode { 501 int code; 502 long v0, v1; 503 long val; 504 struct valnode *next; 505 }; 506 507 #define MODULUS 213 508 static struct valnode *hashtbl[MODULUS]; 509 static int curval; 510 static int maxval; 511 512 /* Integer constants mapped with the load immediate opcode. */ 513 #define K(i) F(BPF_LD|BPF_IMM|BPF_W, i, 0L) 514 515 struct vmapinfo { 516 int is_const; 517 long const_val; 518 }; 519 520 struct vmapinfo *vmap; 521 struct valnode *vnode_base; 522 struct valnode *next_vnode; 523 524 static void 525 init_val() 526 { 527 curval = 0; 528 next_vnode = vnode_base; 529 memset((char *)vmap, 0, maxval * sizeof(*vmap)); 530 memset((char *)hashtbl, 0, sizeof hashtbl); 531 } 532 533 /* Because we really don't have an IR, this stuff is a little messy. */ 534 static long 535 F(code, v0, v1) 536 int code; 537 long v0, v1; 538 { 539 u_int hash; 540 int val; 541 struct valnode *p; 542 543 hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8); 544 hash %= MODULUS; 545 546 for (p = hashtbl[hash]; p; p = p->next) 547 if (p->code == code && p->v0 == v0 && p->v1 == v1) 548 return p->val; 549 550 val = ++curval; 551 if (BPF_MODE(code) == BPF_IMM && 552 (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) { 553 vmap[val].const_val = v0; 554 vmap[val].is_const = 1; 555 } 556 p = next_vnode++; 557 p->val = val; 558 p->code = code; 559 p->v0 = v0; 560 p->v1 = v1; 561 p->next = hashtbl[hash]; 562 hashtbl[hash] = p; 563 564 return val; 565 } 566 567 static inline void 568 vstore(s, valp, newval, alter) 569 struct stmt *s; 570 long *valp; 571 long newval; 572 int alter; 573 { 574 if (alter && *valp == newval) 575 s->code = NOP; 576 else 577 *valp = newval; 578 } 579 580 static void 581 fold_op(s, v0, v1) 582 struct stmt *s; 583 long v0, v1; 584 { 585 long a, b; 586 587 a = vmap[v0].const_val; 588 b = vmap[v1].const_val; 589 590 switch (BPF_OP(s->code)) { 591 case BPF_ADD: 592 a += b; 593 break; 594 595 case BPF_SUB: 596 a -= b; 597 break; 598 599 case BPF_MUL: 600 a *= b; 601 break; 602 603 case BPF_DIV: 604 if (b == 0) 605 bpf_error("division by zero"); 606 a /= b; 607 break; 608 609 case BPF_AND: 610 a &= b; 611 break; 612 613 case BPF_OR: 614 a |= b; 615 break; 616 617 case BPF_LSH: 618 a <<= b; 619 break; 620 621 case BPF_RSH: 622 a >>= b; 623 break; 624 625 case BPF_NEG: 626 a = -a; 627 break; 628 629 default: 630 abort(); 631 } 632 s->k = a; 633 s->code = BPF_LD|BPF_IMM; 634 done = 0; 635 } 636 637 static inline struct slist * 638 this_op(s) 639 struct slist *s; 640 { 641 while (s != 0 && s->s.code == NOP) 642 s = s->next; 643 return s; 644 } 645 646 static void 647 opt_not(b) 648 struct block *b; 649 { 650 struct block *tmp = JT(b); 651 652 JT(b) = JF(b); 653 JF(b) = tmp; 654 } 655 656 static void 657 opt_peep(b) 658 struct block *b; 659 { 660 struct slist *s; 661 struct slist *next, *last; 662 int val; 663 long v; 664 665 s = b->stmts; 666 if (s == 0) 667 return; 668 669 last = s; 670 while (1) { 671 s = this_op(s); 672 if (s == 0) 673 break; 674 next = this_op(s->next); 675 if (next == 0) 676 break; 677 last = next; 678 679 /* 680 * st M[k] --> st M[k] 681 * ldx M[k] tax 682 */ 683 if (s->s.code == BPF_ST && 684 next->s.code == (BPF_LDX|BPF_MEM) && 685 s->s.k == next->s.k) { 686 done = 0; 687 next->s.code = BPF_MISC|BPF_TAX; 688 } 689 /* 690 * ld #k --> ldx #k 691 * tax txa 692 */ 693 if (s->s.code == (BPF_LD|BPF_IMM) && 694 next->s.code == (BPF_MISC|BPF_TAX)) { 695 s->s.code = BPF_LDX|BPF_IMM; 696 next->s.code = BPF_MISC|BPF_TXA; 697 done = 0; 698 } 699 /* 700 * This is an ugly special case, but it happens 701 * when you say tcp[k] or udp[k] where k is a constant. 702 */ 703 if (s->s.code == (BPF_LD|BPF_IMM)) { 704 struct slist *add, *tax, *ild; 705 706 /* 707 * Check that X isn't used on exit from this 708 * block (which the optimizer might cause). 709 * We know the code generator won't generate 710 * any local dependencies. 711 */ 712 if (ATOMELEM(b->out_use, X_ATOM)) 713 break; 714 715 if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B)) 716 add = next; 717 else 718 add = this_op(next->next); 719 if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X)) 720 break; 721 722 tax = this_op(add->next); 723 if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX)) 724 break; 725 726 ild = this_op(tax->next); 727 if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD || 728 BPF_MODE(ild->s.code) != BPF_IND) 729 break; 730 /* 731 * XXX We need to check that X is not 732 * subsequently used. We know we can eliminate the 733 * accumulator modifications since it is defined 734 * by the last stmt of this sequence. 735 * 736 * We want to turn this sequence: 737 * 738 * (004) ldi #0x2 {s} 739 * (005) ldxms [14] {next} -- optional 740 * (006) addx {add} 741 * (007) tax {tax} 742 * (008) ild [x+0] {ild} 743 * 744 * into this sequence: 745 * 746 * (004) nop 747 * (005) ldxms [14] 748 * (006) nop 749 * (007) nop 750 * (008) ild [x+2] 751 * 752 */ 753 ild->s.k += s->s.k; 754 s->s.code = NOP; 755 add->s.code = NOP; 756 tax->s.code = NOP; 757 done = 0; 758 } 759 s = next; 760 } 761 /* 762 * If we have a subtract to do a comparison, and the X register 763 * is a known constant, we can merge this value into the 764 * comparison. 765 */ 766 if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X) && 767 !ATOMELEM(b->out_use, A_ATOM)) { 768 val = b->val[X_ATOM]; 769 if (vmap[val].is_const) { 770 b->s.k += vmap[val].const_val; 771 last->s.code = NOP; 772 done = 0; 773 } else if (b->s.k == 0) { 774 /* 775 * sub x -> nop 776 * j #0 j x 777 */ 778 last->s.code = NOP; 779 b->s.code = BPF_CLASS(b->s.code) | BPF_OP(b->s.code) | 780 BPF_X; 781 done = 0; 782 } 783 } 784 /* 785 * Likewise, a constant subtract can be simplified. 786 */ 787 else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K) && 788 !ATOMELEM(b->out_use, A_ATOM)) { 789 b->s.k += last->s.k; 790 last->s.code = NOP; 791 done = 0; 792 } 793 /* 794 * and #k nop 795 * jeq #0 -> jset #k 796 */ 797 if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) && 798 !ATOMELEM(b->out_use, A_ATOM) && b->s.k == 0) { 799 b->s.k = last->s.k; 800 b->s.code = BPF_JMP|BPF_K|BPF_JSET; 801 last->s.code = NOP; 802 done = 0; 803 opt_not(b); 804 } 805 /* 806 * If the accumulator is a known constant, we can compute the 807 * comparison result. 808 */ 809 val = b->val[A_ATOM]; 810 if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) { 811 v = vmap[val].const_val; 812 switch (BPF_OP(b->s.code)) { 813 814 case BPF_JEQ: 815 v = v == b->s.k; 816 break; 817 818 case BPF_JGT: 819 v = v > b->s.k; 820 break; 821 822 case BPF_JGE: 823 v = v >= b->s.k; 824 break; 825 826 case BPF_JSET: 827 v &= b->s.k; 828 break; 829 830 default: 831 abort(); 832 } 833 if (JF(b) != JT(b)) 834 done = 0; 835 if (v) 836 JF(b) = JT(b); 837 else 838 JT(b) = JF(b); 839 } 840 } 841 842 /* 843 * Compute the symbolic value of expression of 's', and update 844 * anything it defines in the value table 'val'. If 'alter' is true, 845 * do various optimizations. This code would be cleaner if symbolic 846 * evaluation and code transformations weren't folded together. 847 */ 848 static void 849 opt_stmt(s, val, alter) 850 struct stmt *s; 851 long val[]; 852 int alter; 853 { 854 int op; 855 long v; 856 857 switch (s->code) { 858 859 case BPF_LD|BPF_ABS|BPF_W: 860 case BPF_LD|BPF_ABS|BPF_H: 861 case BPF_LD|BPF_ABS|BPF_B: 862 v = F(s->code, s->k, 0L); 863 vstore(s, &val[A_ATOM], v, alter); 864 break; 865 866 case BPF_LD|BPF_IND|BPF_W: 867 case BPF_LD|BPF_IND|BPF_H: 868 case BPF_LD|BPF_IND|BPF_B: 869 v = val[X_ATOM]; 870 if (alter && vmap[v].is_const) { 871 s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code); 872 s->k += vmap[v].const_val; 873 v = F(s->code, s->k, 0L); 874 done = 0; 875 } 876 else 877 v = F(s->code, s->k, v); 878 vstore(s, &val[A_ATOM], v, alter); 879 break; 880 881 case BPF_LD|BPF_LEN: 882 v = F(s->code, 0L, 0L); 883 vstore(s, &val[A_ATOM], v, alter); 884 break; 885 886 case BPF_LD|BPF_IMM: 887 v = K(s->k); 888 vstore(s, &val[A_ATOM], v, alter); 889 break; 890 891 case BPF_LDX|BPF_IMM: 892 v = K(s->k); 893 vstore(s, &val[X_ATOM], v, alter); 894 break; 895 896 case BPF_LDX|BPF_MSH|BPF_B: 897 v = F(s->code, s->k, 0L); 898 vstore(s, &val[X_ATOM], v, alter); 899 break; 900 901 case BPF_ALU|BPF_NEG: 902 if (alter && vmap[val[A_ATOM]].is_const) { 903 s->code = BPF_LD|BPF_IMM; 904 s->k = -vmap[val[A_ATOM]].const_val; 905 val[A_ATOM] = K(s->k); 906 } 907 else 908 val[A_ATOM] = F(s->code, val[A_ATOM], 0L); 909 break; 910 911 case BPF_ALU|BPF_ADD|BPF_K: 912 case BPF_ALU|BPF_SUB|BPF_K: 913 case BPF_ALU|BPF_MUL|BPF_K: 914 case BPF_ALU|BPF_DIV|BPF_K: 915 case BPF_ALU|BPF_AND|BPF_K: 916 case BPF_ALU|BPF_OR|BPF_K: 917 case BPF_ALU|BPF_LSH|BPF_K: 918 case BPF_ALU|BPF_RSH|BPF_K: 919 op = BPF_OP(s->code); 920 if (alter) { 921 if (s->k == 0) { 922 if (op == BPF_ADD || op == BPF_SUB || 923 op == BPF_LSH || op == BPF_RSH || 924 op == BPF_OR) { 925 s->code = NOP; 926 break; 927 } 928 if (op == BPF_MUL || op == BPF_AND) { 929 s->code = BPF_LD|BPF_IMM; 930 val[A_ATOM] = K(s->k); 931 break; 932 } 933 } 934 if (vmap[val[A_ATOM]].is_const) { 935 fold_op(s, val[A_ATOM], K(s->k)); 936 val[A_ATOM] = K(s->k); 937 break; 938 } 939 } 940 val[A_ATOM] = F(s->code, val[A_ATOM], K(s->k)); 941 break; 942 943 case BPF_ALU|BPF_ADD|BPF_X: 944 case BPF_ALU|BPF_SUB|BPF_X: 945 case BPF_ALU|BPF_MUL|BPF_X: 946 case BPF_ALU|BPF_DIV|BPF_X: 947 case BPF_ALU|BPF_AND|BPF_X: 948 case BPF_ALU|BPF_OR|BPF_X: 949 case BPF_ALU|BPF_LSH|BPF_X: 950 case BPF_ALU|BPF_RSH|BPF_X: 951 op = BPF_OP(s->code); 952 if (alter && vmap[val[X_ATOM]].is_const) { 953 if (vmap[val[A_ATOM]].is_const) { 954 fold_op(s, val[A_ATOM], val[X_ATOM]); 955 val[A_ATOM] = K(s->k); 956 } 957 else { 958 s->code = BPF_ALU|BPF_K|op; 959 s->k = vmap[val[X_ATOM]].const_val; 960 done = 0; 961 val[A_ATOM] = 962 F(s->code, val[A_ATOM], K(s->k)); 963 } 964 break; 965 } 966 /* 967 * Check if we're doing something to an accumulator 968 * that is 0, and simplify. This may not seem like 969 * much of a simplification but it could open up further 970 * optimizations. 971 * XXX We could also check for mul by 1, and -1, etc. 972 */ 973 if (alter && vmap[val[A_ATOM]].is_const 974 && vmap[val[A_ATOM]].const_val == 0) { 975 if (op == BPF_ADD || op == BPF_OR || 976 op == BPF_LSH || op == BPF_RSH || op == BPF_SUB) { 977 s->code = BPF_MISC|BPF_TXA; 978 vstore(s, &val[A_ATOM], val[X_ATOM], alter); 979 break; 980 } 981 else if (op == BPF_MUL || op == BPF_DIV || 982 op == BPF_AND) { 983 s->code = BPF_LD|BPF_IMM; 984 s->k = 0; 985 vstore(s, &val[A_ATOM], K(s->k), alter); 986 break; 987 } 988 else if (op == BPF_NEG) { 989 s->code = NOP; 990 break; 991 } 992 } 993 val[A_ATOM] = F(s->code, val[A_ATOM], val[X_ATOM]); 994 break; 995 996 case BPF_MISC|BPF_TXA: 997 vstore(s, &val[A_ATOM], val[X_ATOM], alter); 998 break; 999 1000 case BPF_LD|BPF_MEM: 1001 v = val[s->k]; 1002 if (alter && vmap[v].is_const) { 1003 s->code = BPF_LD|BPF_IMM; 1004 s->k = vmap[v].const_val; 1005 done = 0; 1006 } 1007 vstore(s, &val[A_ATOM], v, alter); 1008 break; 1009 1010 case BPF_MISC|BPF_TAX: 1011 vstore(s, &val[X_ATOM], val[A_ATOM], alter); 1012 break; 1013 1014 case BPF_LDX|BPF_MEM: 1015 v = val[s->k]; 1016 if (alter && vmap[v].is_const) { 1017 s->code = BPF_LDX|BPF_IMM; 1018 s->k = vmap[v].const_val; 1019 done = 0; 1020 } 1021 vstore(s, &val[X_ATOM], v, alter); 1022 break; 1023 1024 case BPF_ST: 1025 vstore(s, &val[s->k], val[A_ATOM], alter); 1026 break; 1027 1028 case BPF_STX: 1029 vstore(s, &val[s->k], val[X_ATOM], alter); 1030 break; 1031 } 1032 } 1033 1034 static void 1035 deadstmt(s, last) 1036 register struct stmt *s; 1037 register struct stmt *last[]; 1038 { 1039 register int atom; 1040 1041 atom = atomuse(s); 1042 if (atom >= 0) { 1043 if (atom == AX_ATOM) { 1044 last[X_ATOM] = 0; 1045 last[A_ATOM] = 0; 1046 } 1047 else 1048 last[atom] = 0; 1049 } 1050 atom = atomdef(s); 1051 if (atom >= 0) { 1052 if (last[atom]) { 1053 done = 0; 1054 last[atom]->code = NOP; 1055 } 1056 last[atom] = s; 1057 } 1058 } 1059 1060 static void 1061 opt_deadstores(b) 1062 register struct block *b; 1063 { 1064 register struct slist *s; 1065 register int atom; 1066 struct stmt *last[N_ATOMS]; 1067 1068 memset((char *)last, 0, sizeof last); 1069 1070 for (s = b->stmts; s != 0; s = s->next) 1071 deadstmt(&s->s, last); 1072 deadstmt(&b->s, last); 1073 1074 for (atom = 0; atom < N_ATOMS; ++atom) 1075 if (last[atom] && !ATOMELEM(b->out_use, atom)) { 1076 last[atom]->code = NOP; 1077 done = 0; 1078 } 1079 } 1080 1081 static void 1082 opt_blk(b, do_stmts) 1083 struct block *b; 1084 int do_stmts; 1085 { 1086 struct slist *s; 1087 struct edge *p; 1088 int i; 1089 long aval; 1090 1091 /* 1092 * Initialize the atom values. 1093 * If we have no predecessors, everything is undefined. 1094 * Otherwise, we inherent our values from our predecessors. 1095 * If any register has an ambiguous value (i.e. control paths are 1096 * merging) give it the undefined value of 0. 1097 */ 1098 p = b->in_edges; 1099 if (p == 0) 1100 memset((char *)b->val, 0, sizeof(b->val)); 1101 else { 1102 memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val)); 1103 while ((p = p->next) != NULL) { 1104 for (i = 0; i < N_ATOMS; ++i) 1105 if (b->val[i] != p->pred->val[i]) 1106 b->val[i] = 0; 1107 } 1108 } 1109 aval = b->val[A_ATOM]; 1110 for (s = b->stmts; s; s = s->next) 1111 opt_stmt(&s->s, b->val, do_stmts); 1112 1113 /* 1114 * This is a special case: if we don't use anything from this 1115 * block, and we load the accumulator with value that is 1116 * already there, eliminate all the statements. 1117 */ 1118 if (do_stmts && b->out_use == 0 && aval != 0 && 1119 b->val[A_ATOM] == aval) 1120 b->stmts = 0; 1121 else { 1122 opt_peep(b); 1123 opt_deadstores(b); 1124 } 1125 /* 1126 * Set up values for branch optimizer. 1127 */ 1128 if (BPF_SRC(b->s.code) == BPF_K) 1129 b->oval = K(b->s.k); 1130 else 1131 b->oval = b->val[X_ATOM]; 1132 b->et.code = b->s.code; 1133 b->ef.code = -b->s.code; 1134 } 1135 1136 /* 1137 * Return true if any register that is used on exit from 'succ', has 1138 * an exit value that is different from the corresponding exit value 1139 * from 'b'. 1140 */ 1141 static int 1142 use_conflict(b, succ) 1143 struct block *b, *succ; 1144 { 1145 int atom; 1146 atomset use = succ->out_use; 1147 1148 if (use == 0) 1149 return 0; 1150 1151 for (atom = 0; atom < N_ATOMS; ++atom) 1152 if (ATOMELEM(use, atom)) 1153 if (b->val[atom] != succ->val[atom]) 1154 return 1; 1155 return 0; 1156 } 1157 1158 static struct block * 1159 fold_edge(child, ep) 1160 struct block *child; 1161 struct edge *ep; 1162 { 1163 int sense; 1164 int aval0, aval1, oval0, oval1; 1165 int code = ep->code; 1166 1167 if (code < 0) { 1168 code = -code; 1169 sense = 0; 1170 } else 1171 sense = 1; 1172 1173 if (child->s.code != code) 1174 return 0; 1175 1176 aval0 = child->val[A_ATOM]; 1177 oval0 = child->oval; 1178 aval1 = ep->pred->val[A_ATOM]; 1179 oval1 = ep->pred->oval; 1180 1181 if (aval0 != aval1) 1182 return 0; 1183 1184 if (oval0 == oval1) 1185 /* 1186 * The operands are identical, so the 1187 * result is true if a true branch was 1188 * taken to get here, otherwise false. 1189 */ 1190 return sense ? JT(child) : JF(child); 1191 1192 if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K)) 1193 /* 1194 * At this point, we only know the comparison if we 1195 * came down the true branch, and it was an equality 1196 * comparison with a constant. We rely on the fact that 1197 * distinct constants have distinct value numbers. 1198 */ 1199 return JF(child); 1200 1201 return 0; 1202 } 1203 1204 static void 1205 opt_j(ep) 1206 struct edge *ep; 1207 { 1208 register int i, k; 1209 register struct block *target; 1210 1211 if (JT(ep->succ) == 0) 1212 return; 1213 1214 if (JT(ep->succ) == JF(ep->succ)) { 1215 /* 1216 * Common branch targets can be eliminated, provided 1217 * there is no data dependency. 1218 */ 1219 if (!use_conflict(ep->pred, ep->succ->et.succ)) { 1220 done = 0; 1221 ep->succ = JT(ep->succ); 1222 } 1223 } 1224 /* 1225 * For each edge dominator that matches the successor of this 1226 * edge, promote the edge successor to the its grandchild. 1227 * 1228 * XXX We violate the set abstraction here in favor a reasonably 1229 * efficient loop. 1230 */ 1231 top: 1232 for (i = 0; i < edgewords; ++i) { 1233 register u_long x = ep->edom[i]; 1234 1235 while (x != 0) { 1236 k = ffs(x) - 1; 1237 x &=~ (1 << k); 1238 k += i * BITS_PER_WORD; 1239 1240 target = fold_edge(ep->succ, edges[k]); 1241 /* 1242 * Check that there is no data dependency between 1243 * nodes that will be violated if we move the edge. 1244 */ 1245 if (target != 0 && !use_conflict(ep->pred, target)) { 1246 done = 0; 1247 ep->succ = target; 1248 if (JT(target) != 0) 1249 /* 1250 * Start over unless we hit a leaf. 1251 */ 1252 goto top; 1253 return; 1254 } 1255 } 1256 } 1257 } 1258 1259 1260 static void 1261 or_pullup(b) 1262 struct block *b; 1263 { 1264 int val, at_top; 1265 struct block *pull; 1266 struct block **diffp, **samep; 1267 struct edge *ep; 1268 1269 ep = b->in_edges; 1270 if (ep == 0) 1271 return; 1272 1273 /* 1274 * Make sure each predecessor loads the same value. 1275 * XXX why? 1276 */ 1277 val = ep->pred->val[A_ATOM]; 1278 for (ep = ep->next; ep != 0; ep = ep->next) 1279 if (val != ep->pred->val[A_ATOM]) 1280 return; 1281 1282 if (JT(b->in_edges->pred) == b) 1283 diffp = &JT(b->in_edges->pred); 1284 else 1285 diffp = &JF(b->in_edges->pred); 1286 1287 at_top = 1; 1288 while (1) { 1289 if (*diffp == 0) 1290 return; 1291 1292 if (JT(*diffp) != JT(b)) 1293 return; 1294 1295 if (!SET_MEMBER((*diffp)->dom, b->id)) 1296 return; 1297 1298 if ((*diffp)->val[A_ATOM] != val) 1299 break; 1300 1301 diffp = &JF(*diffp); 1302 at_top = 0; 1303 } 1304 samep = &JF(*diffp); 1305 while (1) { 1306 if (*samep == 0) 1307 return; 1308 1309 if (JT(*samep) != JT(b)) 1310 return; 1311 1312 if (!SET_MEMBER((*samep)->dom, b->id)) 1313 return; 1314 1315 if ((*samep)->val[A_ATOM] == val) 1316 break; 1317 1318 /* XXX Need to check that there are no data dependencies 1319 between dp0 and dp1. Currently, the code generator 1320 will not produce such dependencies. */ 1321 samep = &JF(*samep); 1322 } 1323 #ifdef notdef 1324 /* XXX This doesn't cover everything. */ 1325 for (i = 0; i < N_ATOMS; ++i) 1326 if ((*samep)->val[i] != pred->val[i]) 1327 return; 1328 #endif 1329 /* Pull up the node. */ 1330 pull = *samep; 1331 *samep = JF(pull); 1332 JF(pull) = *diffp; 1333 1334 /* 1335 * At the top of the chain, each predecessor needs to point at the 1336 * pulled up node. Inside the chain, there is only one predecessor 1337 * to worry about. 1338 */ 1339 if (at_top) { 1340 for (ep = b->in_edges; ep != 0; ep = ep->next) { 1341 if (JT(ep->pred) == b) 1342 JT(ep->pred) = pull; 1343 else 1344 JF(ep->pred) = pull; 1345 } 1346 } 1347 else 1348 *diffp = pull; 1349 1350 done = 0; 1351 } 1352 1353 static void 1354 and_pullup(b) 1355 struct block *b; 1356 { 1357 int val, at_top; 1358 struct block *pull; 1359 struct block **diffp, **samep; 1360 struct edge *ep; 1361 1362 ep = b->in_edges; 1363 if (ep == 0) 1364 return; 1365 1366 /* 1367 * Make sure each predecessor loads the same value. 1368 */ 1369 val = ep->pred->val[A_ATOM]; 1370 for (ep = ep->next; ep != 0; ep = ep->next) 1371 if (val != ep->pred->val[A_ATOM]) 1372 return; 1373 1374 if (JT(b->in_edges->pred) == b) 1375 diffp = &JT(b->in_edges->pred); 1376 else 1377 diffp = &JF(b->in_edges->pred); 1378 1379 at_top = 1; 1380 while (1) { 1381 if (*diffp == 0) 1382 return; 1383 1384 if (JF(*diffp) != JF(b)) 1385 return; 1386 1387 if (!SET_MEMBER((*diffp)->dom, b->id)) 1388 return; 1389 1390 if ((*diffp)->val[A_ATOM] != val) 1391 break; 1392 1393 diffp = &JT(*diffp); 1394 at_top = 0; 1395 } 1396 samep = &JT(*diffp); 1397 while (1) { 1398 if (*samep == 0) 1399 return; 1400 1401 if (JF(*samep) != JF(b)) 1402 return; 1403 1404 if (!SET_MEMBER((*samep)->dom, b->id)) 1405 return; 1406 1407 if ((*samep)->val[A_ATOM] == val) 1408 break; 1409 1410 /* XXX Need to check that there are no data dependencies 1411 between diffp and samep. Currently, the code generator 1412 will not produce such dependencies. */ 1413 samep = &JT(*samep); 1414 } 1415 #ifdef notdef 1416 /* XXX This doesn't cover everything. */ 1417 for (i = 0; i < N_ATOMS; ++i) 1418 if ((*samep)->val[i] != pred->val[i]) 1419 return; 1420 #endif 1421 /* Pull up the node. */ 1422 pull = *samep; 1423 *samep = JT(pull); 1424 JT(pull) = *diffp; 1425 1426 /* 1427 * At the top of the chain, each predecessor needs to point at the 1428 * pulled up node. Inside the chain, there is only one predecessor 1429 * to worry about. 1430 */ 1431 if (at_top) { 1432 for (ep = b->in_edges; ep != 0; ep = ep->next) { 1433 if (JT(ep->pred) == b) 1434 JT(ep->pred) = pull; 1435 else 1436 JF(ep->pred) = pull; 1437 } 1438 } 1439 else 1440 *diffp = pull; 1441 1442 done = 0; 1443 } 1444 1445 static void 1446 opt_blks(root, do_stmts) 1447 struct block *root; 1448 int do_stmts; 1449 { 1450 int i, maxlevel; 1451 struct block *p; 1452 1453 init_val(); 1454 maxlevel = root->level; 1455 for (i = maxlevel; i >= 0; --i) 1456 for (p = levels[i]; p; p = p->link) 1457 opt_blk(p, do_stmts); 1458 1459 if (do_stmts) 1460 /* 1461 * No point trying to move branches; it can't possibly 1462 * make a difference at this point. 1463 */ 1464 return; 1465 1466 for (i = 1; i <= maxlevel; ++i) { 1467 for (p = levels[i]; p; p = p->link) { 1468 opt_j(&p->et); 1469 opt_j(&p->ef); 1470 } 1471 } 1472 for (i = 1; i <= maxlevel; ++i) { 1473 for (p = levels[i]; p; p = p->link) { 1474 or_pullup(p); 1475 and_pullup(p); 1476 } 1477 } 1478 } 1479 1480 static inline void 1481 link_inedge(parent, child) 1482 struct edge *parent; 1483 struct block *child; 1484 { 1485 parent->next = child->in_edges; 1486 child->in_edges = parent; 1487 } 1488 1489 static void 1490 find_inedges(root) 1491 struct block *root; 1492 { 1493 int i; 1494 struct block *b; 1495 1496 for (i = 0; i < n_blocks; ++i) 1497 blocks[i]->in_edges = 0; 1498 1499 /* 1500 * Traverse the graph, adding each edge to the predecessor 1501 * list of its successors. Skip the leaves (i.e. level 0). 1502 */ 1503 for (i = root->level; i > 0; --i) { 1504 for (b = levels[i]; b != 0; b = b->link) { 1505 link_inedge(&b->et, JT(b)); 1506 link_inedge(&b->ef, JF(b)); 1507 } 1508 } 1509 } 1510 1511 static void 1512 opt_root(b) 1513 struct block **b; 1514 { 1515 struct slist *tmp, *s; 1516 1517 s = (*b)->stmts; 1518 (*b)->stmts = 0; 1519 while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b)) 1520 *b = JT(*b); 1521 1522 tmp = (*b)->stmts; 1523 if (tmp != 0) 1524 sappend(s, tmp); 1525 (*b)->stmts = s; 1526 } 1527 1528 static void 1529 opt_loop(root, do_stmts) 1530 struct block *root; 1531 int do_stmts; 1532 { 1533 1534 #ifdef BDEBUG 1535 if (dflag > 1) 1536 opt_dump(root); 1537 #endif 1538 do { 1539 done = 1; 1540 find_levels(root); 1541 find_dom(root); 1542 find_closure(root); 1543 find_inedges(root); 1544 find_ud(root); 1545 find_edom(root); 1546 opt_blks(root, do_stmts); 1547 #ifdef BDEBUG 1548 if (dflag > 1) 1549 opt_dump(root); 1550 #endif 1551 } while (!done); 1552 } 1553 1554 /* 1555 * Optimize the filter code in its dag representation. 1556 */ 1557 void 1558 bpf_optimize(rootp) 1559 struct block **rootp; 1560 { 1561 struct block *root; 1562 1563 root = *rootp; 1564 1565 opt_init(root); 1566 opt_loop(root, 0); 1567 opt_loop(root, 1); 1568 intern_blocks(root); 1569 opt_root(rootp); 1570 opt_cleanup(); 1571 } 1572 1573 static void 1574 make_marks(p) 1575 struct block *p; 1576 { 1577 if (!isMarked(p)) { 1578 Mark(p); 1579 if (BPF_CLASS(p->s.code) != BPF_RET) { 1580 make_marks(JT(p)); 1581 make_marks(JF(p)); 1582 } 1583 } 1584 } 1585 1586 /* 1587 * Mark code array such that isMarked(i) is true 1588 * only for nodes that are alive. 1589 */ 1590 static void 1591 mark_code(p) 1592 struct block *p; 1593 { 1594 cur_mark += 1; 1595 make_marks(p); 1596 } 1597 1598 /* 1599 * True iff the two stmt lists load the same value from the packet into 1600 * the accumulator. 1601 */ 1602 static int 1603 eq_slist(x, y) 1604 struct slist *x, *y; 1605 { 1606 while (1) { 1607 while (x && x->s.code == NOP) 1608 x = x->next; 1609 while (y && y->s.code == NOP) 1610 y = y->next; 1611 if (x == 0) 1612 return y == 0; 1613 if (y == 0) 1614 return x == 0; 1615 if (x->s.code != y->s.code || x->s.k != y->s.k) 1616 return 0; 1617 x = x->next; 1618 y = y->next; 1619 } 1620 } 1621 1622 static inline int 1623 eq_blk(b0, b1) 1624 struct block *b0, *b1; 1625 { 1626 if (b0->s.code == b1->s.code && 1627 b0->s.k == b1->s.k && 1628 b0->et.succ == b1->et.succ && 1629 b0->ef.succ == b1->ef.succ) 1630 return eq_slist(b0->stmts, b1->stmts); 1631 return 0; 1632 } 1633 1634 static void 1635 intern_blocks(root) 1636 struct block *root; 1637 { 1638 struct block *p; 1639 int i, j; 1640 int done; 1641 top: 1642 done = 1; 1643 for (i = 0; i < n_blocks; ++i) 1644 blocks[i]->link = 0; 1645 1646 mark_code(root); 1647 1648 for (i = n_blocks - 1; --i >= 0; ) { 1649 if (!isMarked(blocks[i])) 1650 continue; 1651 for (j = i + 1; j < n_blocks; ++j) { 1652 if (!isMarked(blocks[j])) 1653 continue; 1654 if (eq_blk(blocks[i], blocks[j])) { 1655 blocks[i]->link = blocks[j]->link ? 1656 blocks[j]->link : blocks[j]; 1657 break; 1658 } 1659 } 1660 } 1661 for (i = 0; i < n_blocks; ++i) { 1662 p = blocks[i]; 1663 if (JT(p) == 0) 1664 continue; 1665 if (JT(p)->link) { 1666 done = 0; 1667 JT(p) = JT(p)->link; 1668 } 1669 if (JF(p)->link) { 1670 done = 0; 1671 JF(p) = JF(p)->link; 1672 } 1673 } 1674 if (!done) 1675 goto top; 1676 } 1677 1678 static void 1679 opt_cleanup() 1680 { 1681 free((void *)vnode_base); 1682 free((void *)vmap); 1683 free((void *)edges); 1684 free((void *)space); 1685 free((void *)levels); 1686 free((void *)blocks); 1687 } 1688 1689 /* 1690 * Return the number of stmts in 's'. 1691 */ 1692 static int 1693 slength(s) 1694 struct slist *s; 1695 { 1696 int n = 0; 1697 1698 for (; s; s = s->next) 1699 if (s->s.code != NOP) 1700 ++n; 1701 return n; 1702 } 1703 1704 /* 1705 * Return the number of nodes reachable by 'p'. 1706 * All nodes should be initially unmarked. 1707 */ 1708 static int 1709 count_blocks(p) 1710 struct block *p; 1711 { 1712 if (p == 0 || isMarked(p)) 1713 return 0; 1714 Mark(p); 1715 return count_blocks(JT(p)) + count_blocks(JF(p)) + 1; 1716 } 1717 1718 /* 1719 * Do a depth first search on the flow graph, numbering the 1720 * the basic blocks, and entering them into the 'blocks' array.` 1721 */ 1722 static void 1723 number_blks_r(p) 1724 struct block *p; 1725 { 1726 int n; 1727 1728 if (p == 0 || isMarked(p)) 1729 return; 1730 1731 Mark(p); 1732 n = n_blocks++; 1733 p->id = n; 1734 blocks[n] = p; 1735 1736 number_blks_r(JT(p)); 1737 number_blks_r(JF(p)); 1738 } 1739 1740 /* 1741 * Return the number of stmts in the flowgraph reachable by 'p'. 1742 * The nodes should be unmarked before calling. 1743 */ 1744 static int 1745 count_stmts(p) 1746 struct block *p; 1747 { 1748 int n; 1749 1750 if (p == 0 || isMarked(p)) 1751 return 0; 1752 Mark(p); 1753 n = count_stmts(JT(p)) + count_stmts(JF(p)); 1754 return slength(p->stmts) + n + 1; 1755 } 1756 1757 /* 1758 * Allocate memory. All allocation is done before optimization 1759 * is begun. A linear bound on the size of all data structures is computed 1760 * from the total number of blocks and/or statements. 1761 */ 1762 static void 1763 opt_init(root) 1764 struct block *root; 1765 { 1766 u_long *p; 1767 int i, n, max_stmts; 1768 1769 /* 1770 * First, count the blocks, so we can malloc an array to map 1771 * block number to block. Then, put the blocks into the array. 1772 */ 1773 unMarkAll(); 1774 n = count_blocks(root); 1775 blocks = (struct block **)malloc(n * sizeof(*blocks)); 1776 unMarkAll(); 1777 n_blocks = 0; 1778 number_blks_r(root); 1779 1780 n_edges = 2 * n_blocks; 1781 edges = (struct edge **)malloc(n_edges * sizeof(*edges)); 1782 1783 /* 1784 * The number of levels is bounded by the number of nodes. 1785 */ 1786 levels = (struct block **)malloc(n_blocks * sizeof(*levels)); 1787 1788 edgewords = n_edges / (8 * sizeof(u_long)) + 1; 1789 nodewords = n_blocks / (8 * sizeof(u_long)) + 1; 1790 1791 /* XXX */ 1792 space = (u_long *)malloc(2 * n_blocks * nodewords * sizeof(*space) 1793 + n_edges * edgewords * sizeof(*space)); 1794 p = space; 1795 all_dom_sets = p; 1796 for (i = 0; i < n; ++i) { 1797 blocks[i]->dom = p; 1798 p += nodewords; 1799 } 1800 all_closure_sets = p; 1801 for (i = 0; i < n; ++i) { 1802 blocks[i]->closure = p; 1803 p += nodewords; 1804 } 1805 all_edge_sets = p; 1806 for (i = 0; i < n; ++i) { 1807 register struct block *b = blocks[i]; 1808 1809 b->et.edom = p; 1810 p += edgewords; 1811 b->ef.edom = p; 1812 p += edgewords; 1813 b->et.id = i; 1814 edges[i] = &b->et; 1815 b->ef.id = n_blocks + i; 1816 edges[n_blocks + i] = &b->ef; 1817 b->et.pred = b; 1818 b->ef.pred = b; 1819 } 1820 max_stmts = 0; 1821 for (i = 0; i < n; ++i) 1822 max_stmts += slength(blocks[i]->stmts) + 1; 1823 /* 1824 * We allocate at most 3 value numbers per statement, 1825 * so this is an upper bound on the number of valnodes 1826 * we'll need. 1827 */ 1828 maxval = 3 * max_stmts; 1829 vmap = (struct vmapinfo *)malloc(maxval * sizeof(*vmap)); 1830 vnode_base = (struct valnode *)malloc(maxval * sizeof(*vmap)); 1831 } 1832 1833 /* 1834 * Some pointers used to convert the basic block form of the code, 1835 * into the array form that BPF requires. 'fstart' will point to 1836 * the malloc'd array while 'ftail' is used during the recursive traversal. 1837 */ 1838 static struct bpf_insn *fstart; 1839 static struct bpf_insn *ftail; 1840 1841 #ifdef BDEBUG 1842 int bids[1000]; 1843 #endif 1844 1845 static void 1846 convert_code_r(p) 1847 struct block *p; 1848 { 1849 struct bpf_insn *dst; 1850 struct slist *src; 1851 int slen; 1852 u_int off; 1853 1854 if (p == 0 || isMarked(p)) 1855 return; 1856 Mark(p); 1857 1858 convert_code_r(JF(p)); 1859 convert_code_r(JT(p)); 1860 1861 slen = slength(p->stmts); 1862 dst = ftail -= slen + 1; 1863 1864 p->offset = dst - fstart; 1865 1866 for (src = p->stmts; src; src = src->next) { 1867 if (src->s.code == NOP) 1868 continue; 1869 dst->code = (u_short)src->s.code; 1870 dst->k = src->s.k; 1871 ++dst; 1872 } 1873 #ifdef BDEBUG 1874 bids[dst - fstart] = p->id + 1; 1875 #endif 1876 dst->code = (u_short)p->s.code; 1877 dst->k = p->s.k; 1878 if (JT(p)) { 1879 off = JT(p)->offset - (p->offset + slen) - 1; 1880 if (off >= 256) 1881 bpf_error("long jumps not supported"); 1882 dst->jt = off; 1883 off = JF(p)->offset - (p->offset + slen) - 1; 1884 if (off >= 256) 1885 bpf_error("long jumps not supported"); 1886 dst->jf = off; 1887 } 1888 } 1889 1890 1891 /* 1892 * Convert flowgraph intermediate representation to the 1893 * BPF array representation. Set *lenp to the number of instructions. 1894 */ 1895 struct bpf_insn * 1896 icode_to_fcode(root, lenp) 1897 struct block *root; 1898 int *lenp; 1899 { 1900 int n; 1901 struct bpf_insn *fp; 1902 1903 unMarkAll(); 1904 n = *lenp = count_stmts(root); 1905 1906 fp = (struct bpf_insn *)malloc(sizeof(*fp) * n); 1907 memset((char *)fp, 0, sizeof(*fp) * n); 1908 fstart = fp; 1909 ftail = fp + n; 1910 1911 unMarkAll(); 1912 convert_code_r(root); 1913 1914 return fp; 1915 } 1916 1917 #ifdef BDEBUG 1918 opt_dump(root) 1919 struct block *root; 1920 { 1921 struct bpf_program f; 1922 1923 memset(bids, 0, sizeof bids); 1924 f.bf_insns = icode_to_fcode(root, &f.bf_len); 1925 bpf_dump(&f, 1); 1926 putchar('\n'); 1927 free((char *)f.bf_insns); 1928 } 1929 #endif 1930