1 /* $OpenBSD: optimize.c,v 1.13 2007/09/02 15:19:18 deraadt Exp $ */ 2 3 /* 4 * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that: (1) source code distributions 9 * retain the above copyright notice and this paragraph in its entirety, (2) 10 * distributions including binary code include the above copyright notice and 11 * this paragraph in its entirety in the documentation or other materials 12 * provided with the distribution, and (3) all advertising materials mentioning 13 * features or use of this software display the following acknowledgement: 14 * ``This product includes software developed by the University of California, 15 * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of 16 * the University nor the names of its contributors may be used to endorse 17 * or promote products derived from this software without specific prior 18 * written permission. 19 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED 20 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF 21 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 22 * 23 * Optimization module for tcpdump intermediate representation. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/time.h> 28 29 #include <stdio.h> 30 #include <stdlib.h> 31 #include <memory.h> 32 33 #include "pcap-int.h" 34 35 #include "gencode.h" 36 37 #ifdef HAVE_OS_PROTO_H 38 #include "os-proto.h" 39 #endif 40 41 #ifdef BDEBUG 42 extern int dflag; 43 #endif 44 45 #define A_ATOM BPF_MEMWORDS 46 #define X_ATOM (BPF_MEMWORDS+1) 47 48 #define NOP -1 49 50 /* 51 * This define is used to represent *both* the accumulator and 52 * x register in use-def computations. 53 * Currently, the use-def code assumes only one definition per instruction. 54 */ 55 #define AX_ATOM N_ATOMS 56 57 /* 58 * A flag to indicate that further optimization is needed. 59 * Iterative passes are continued until a given pass yields no 60 * branch movement. 61 */ 62 static int done; 63 64 /* 65 * A block is marked if only if its mark equals the current mark. 66 * Rather than traverse the code array, marking each item, 'cur_mark' is 67 * incremented. This automatically makes each element unmarked. 68 */ 69 static int cur_mark; 70 #define isMarked(p) ((p)->mark == cur_mark) 71 #define unMarkAll() cur_mark += 1 72 #define Mark(p) ((p)->mark = cur_mark) 73 74 static void opt_init(struct block *); 75 static void opt_cleanup(void); 76 77 static void make_marks(struct block *); 78 static void mark_code(struct block *); 79 80 static void intern_blocks(struct block *); 81 82 static int eq_slist(struct slist *, struct slist *); 83 84 static void find_levels_r(struct block *); 85 86 static void find_levels(struct block *); 87 static void find_dom(struct block *); 88 static void propedom(struct edge *); 89 static void find_edom(struct block *); 90 static void find_closure(struct block *); 91 static int atomuse(struct stmt *); 92 static int atomdef(struct stmt *); 93 static void compute_local_ud(struct block *); 94 static void find_ud(struct block *); 95 static void init_val(void); 96 static int F(int, int, int); 97 static __inline void vstore(struct stmt *, int *, int, int); 98 static void opt_blk(struct block *, int); 99 static int use_conflict(struct block *, struct block *); 100 static void opt_j(struct edge *); 101 static void or_pullup(struct block *); 102 static void and_pullup(struct block *); 103 static void opt_blks(struct block *, int); 104 static __inline void link_inedge(struct edge *, struct block *); 105 static void find_inedges(struct block *); 106 static void opt_root(struct block **); 107 static void opt_loop(struct block *, int); 108 static void fold_op(struct stmt *, int, int); 109 static __inline struct slist *this_op(struct slist *); 110 static void opt_not(struct block *); 111 static void opt_peep(struct block *); 112 static void opt_stmt(struct stmt *, int[], int); 113 static void deadstmt(struct stmt *, struct stmt *[]); 114 static void opt_deadstores(struct block *); 115 static void opt_blk(struct block *, int); 116 static int use_conflict(struct block *, struct block *); 117 static void opt_j(struct edge *); 118 static struct block *fold_edge(struct block *, struct edge *); 119 static __inline int eq_blk(struct block *, struct block *); 120 static int slength(struct slist *); 121 static int count_blocks(struct block *); 122 static void number_blks_r(struct block *); 123 static int count_stmts(struct block *); 124 static int convert_code_r(struct block *); 125 #ifdef BDEBUG 126 static void opt_dump(struct block *); 127 #endif 128 129 static int n_blocks; 130 struct block **blocks; 131 static int n_edges; 132 struct edge **edges; 133 134 /* 135 * A bit vector set representation of the dominators. 136 * We round up the set size to the next power of two. 137 */ 138 static int nodewords; 139 static int edgewords; 140 struct block **levels; 141 bpf_u_int32 *space; 142 #define BITS_PER_WORD (8*sizeof(bpf_u_int32)) 143 /* 144 * True if a is in uset {p} 145 */ 146 #define SET_MEMBER(p, a) \ 147 ((p)[(unsigned)(a) / BITS_PER_WORD] & (1 << ((unsigned)(a) % BITS_PER_WORD))) 148 149 /* 150 * Add 'a' to uset p. 151 */ 152 #define SET_INSERT(p, a) \ 153 (p)[(unsigned)(a) / BITS_PER_WORD] |= (1 << ((unsigned)(a) % BITS_PER_WORD)) 154 155 /* 156 * Delete 'a' from uset p. 157 */ 158 #define SET_DELETE(p, a) \ 159 (p)[(unsigned)(a) / BITS_PER_WORD] &= ~(1 << ((unsigned)(a) % BITS_PER_WORD)) 160 161 /* 162 * a := a intersect b 163 */ 164 #define SET_INTERSECT(a, b, n)\ 165 {\ 166 register bpf_u_int32 *_x = a, *_y = b;\ 167 register int _n = n;\ 168 while (--_n >= 0) *_x++ &= *_y++;\ 169 } 170 171 /* 172 * a := a - b 173 */ 174 #define SET_SUBTRACT(a, b, n)\ 175 {\ 176 register bpf_u_int32 *_x = a, *_y = b;\ 177 register int _n = n;\ 178 while (--_n >= 0) *_x++ &=~ *_y++;\ 179 } 180 181 /* 182 * a := a union b 183 */ 184 #define SET_UNION(a, b, n)\ 185 {\ 186 register bpf_u_int32 *_x = a, *_y = b;\ 187 register int _n = n;\ 188 while (--_n >= 0) *_x++ |= *_y++;\ 189 } 190 191 static uset all_dom_sets; 192 static uset all_closure_sets; 193 static uset all_edge_sets; 194 195 #ifndef MAX 196 #define MAX(a,b) ((a)>(b)?(a):(b)) 197 #endif 198 199 static void 200 find_levels_r(b) 201 struct block *b; 202 { 203 int level; 204 205 if (isMarked(b)) 206 return; 207 208 Mark(b); 209 b->link = 0; 210 211 if (JT(b)) { 212 find_levels_r(JT(b)); 213 find_levels_r(JF(b)); 214 level = MAX(JT(b)->level, JF(b)->level) + 1; 215 } else 216 level = 0; 217 b->level = level; 218 b->link = levels[level]; 219 levels[level] = b; 220 } 221 222 /* 223 * Level graph. The levels go from 0 at the leaves to 224 * N_LEVELS at the root. The levels[] array points to the 225 * first node of the level list, whose elements are linked 226 * with the 'link' field of the struct block. 227 */ 228 static void 229 find_levels(root) 230 struct block *root; 231 { 232 memset((char *)levels, 0, n_blocks * sizeof(*levels)); 233 unMarkAll(); 234 find_levels_r(root); 235 } 236 237 /* 238 * Find dominator relationships. 239 * Assumes graph has been leveled. 240 */ 241 static void 242 find_dom(root) 243 struct block *root; 244 { 245 int i; 246 struct block *b; 247 bpf_u_int32 *x; 248 249 /* 250 * Initialize sets to contain all nodes. 251 */ 252 x = all_dom_sets; 253 i = n_blocks * nodewords; 254 while (--i >= 0) 255 *x++ = ~0; 256 /* Root starts off empty. */ 257 for (i = nodewords; --i >= 0;) 258 root->dom[i] = 0; 259 260 /* root->level is the highest level no found. */ 261 for (i = root->level; i >= 0; --i) { 262 for (b = levels[i]; b; b = b->link) { 263 SET_INSERT(b->dom, b->id); 264 if (JT(b) == 0) 265 continue; 266 SET_INTERSECT(JT(b)->dom, b->dom, nodewords); 267 SET_INTERSECT(JF(b)->dom, b->dom, nodewords); 268 } 269 } 270 } 271 272 static void 273 propedom(ep) 274 struct edge *ep; 275 { 276 SET_INSERT(ep->edom, ep->id); 277 if (ep->succ) { 278 SET_INTERSECT(ep->succ->et.edom, ep->edom, edgewords); 279 SET_INTERSECT(ep->succ->ef.edom, ep->edom, edgewords); 280 } 281 } 282 283 /* 284 * Compute edge dominators. 285 * Assumes graph has been leveled and predecessors established. 286 */ 287 static void 288 find_edom(root) 289 struct block *root; 290 { 291 int i; 292 uset x; 293 struct block *b; 294 295 x = all_edge_sets; 296 for (i = n_edges * edgewords; --i >= 0; ) 297 x[i] = ~0; 298 299 /* root->level is the highest level no found. */ 300 memset(root->et.edom, 0, edgewords * sizeof(*(uset)0)); 301 memset(root->ef.edom, 0, edgewords * sizeof(*(uset)0)); 302 for (i = root->level; i >= 0; --i) { 303 for (b = levels[i]; b != 0; b = b->link) { 304 propedom(&b->et); 305 propedom(&b->ef); 306 } 307 } 308 } 309 310 /* 311 * Find the backwards transitive closure of the flow graph. These sets 312 * are backwards in the sense that we find the set of nodes that reach 313 * a given node, not the set of nodes that can be reached by a node. 314 * 315 * Assumes graph has been leveled. 316 */ 317 static void 318 find_closure(root) 319 struct block *root; 320 { 321 int i; 322 struct block *b; 323 324 /* 325 * Initialize sets to contain no nodes. 326 */ 327 memset((char *)all_closure_sets, 0, 328 n_blocks * nodewords * sizeof(*all_closure_sets)); 329 330 /* root->level is the highest level no found. */ 331 for (i = root->level; i >= 0; --i) { 332 for (b = levels[i]; b; b = b->link) { 333 SET_INSERT(b->closure, b->id); 334 if (JT(b) == 0) 335 continue; 336 SET_UNION(JT(b)->closure, b->closure, nodewords); 337 SET_UNION(JF(b)->closure, b->closure, nodewords); 338 } 339 } 340 } 341 342 /* 343 * Return the register number that is used by s. If A and X are both 344 * used, return AX_ATOM. If no register is used, return -1. 345 * 346 * The implementation should probably change to an array access. 347 */ 348 static int 349 atomuse(s) 350 struct stmt *s; 351 { 352 register int c = s->code; 353 354 if (c == NOP) 355 return -1; 356 357 switch (BPF_CLASS(c)) { 358 359 case BPF_RET: 360 return (BPF_RVAL(c) == BPF_A) ? A_ATOM : 361 (BPF_RVAL(c) == BPF_X) ? X_ATOM : -1; 362 363 case BPF_LD: 364 case BPF_LDX: 365 return (BPF_MODE(c) == BPF_IND) ? X_ATOM : 366 (BPF_MODE(c) == BPF_MEM) ? s->k : -1; 367 368 case BPF_ST: 369 return A_ATOM; 370 371 case BPF_STX: 372 return X_ATOM; 373 374 case BPF_JMP: 375 case BPF_ALU: 376 if (BPF_SRC(c) == BPF_X) 377 return AX_ATOM; 378 return A_ATOM; 379 380 case BPF_MISC: 381 return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM; 382 } 383 abort(); 384 /* NOTREACHED */ 385 } 386 387 /* 388 * Return the register number that is defined by 's'. We assume that 389 * a single stmt cannot define more than one register. If no register 390 * is defined, return -1. 391 * 392 * The implementation should probably change to an array access. 393 */ 394 static int 395 atomdef(s) 396 struct stmt *s; 397 { 398 if (s->code == NOP) 399 return -1; 400 401 switch (BPF_CLASS(s->code)) { 402 403 case BPF_LD: 404 case BPF_ALU: 405 return A_ATOM; 406 407 case BPF_LDX: 408 return X_ATOM; 409 410 case BPF_ST: 411 case BPF_STX: 412 return s->k; 413 414 case BPF_MISC: 415 return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM; 416 } 417 return -1; 418 } 419 420 static void 421 compute_local_ud(b) 422 struct block *b; 423 { 424 struct slist *s; 425 atomset def = 0, use = 0, kill = 0; 426 int atom; 427 428 for (s = b->stmts; s; s = s->next) { 429 if (s->s.code == NOP) 430 continue; 431 atom = atomuse(&s->s); 432 if (atom >= 0) { 433 if (atom == AX_ATOM) { 434 if (!ATOMELEM(def, X_ATOM)) 435 use |= ATOMMASK(X_ATOM); 436 if (!ATOMELEM(def, A_ATOM)) 437 use |= ATOMMASK(A_ATOM); 438 } 439 else if (atom < N_ATOMS) { 440 if (!ATOMELEM(def, atom)) 441 use |= ATOMMASK(atom); 442 } 443 else 444 abort(); 445 } 446 atom = atomdef(&s->s); 447 if (atom >= 0) { 448 if (!ATOMELEM(use, atom)) 449 kill |= ATOMMASK(atom); 450 def |= ATOMMASK(atom); 451 } 452 } 453 if (!ATOMELEM(def, A_ATOM) && BPF_CLASS(b->s.code) == BPF_JMP) 454 use |= ATOMMASK(A_ATOM); 455 456 b->def = def; 457 b->kill = kill; 458 b->in_use = use; 459 } 460 461 /* 462 * Assume graph is already leveled. 463 */ 464 static void 465 find_ud(root) 466 struct block *root; 467 { 468 int i, maxlevel; 469 struct block *p; 470 471 /* 472 * root->level is the highest level no found; 473 * count down from there. 474 */ 475 maxlevel = root->level; 476 for (i = maxlevel; i >= 0; --i) 477 for (p = levels[i]; p; p = p->link) { 478 compute_local_ud(p); 479 p->out_use = 0; 480 } 481 482 for (i = 1; i <= maxlevel; ++i) { 483 for (p = levels[i]; p; p = p->link) { 484 p->out_use |= JT(p)->in_use | JF(p)->in_use; 485 p->in_use |= p->out_use &~ p->kill; 486 } 487 } 488 } 489 490 /* 491 * These data structures are used in a Cocke and Shwarz style 492 * value numbering scheme. Since the flowgraph is acyclic, 493 * exit values can be propagated from a node's predecessors 494 * provided it is uniquely defined. 495 */ 496 struct valnode { 497 int code; 498 int v0, v1; 499 int val; 500 struct valnode *next; 501 }; 502 503 #define MODULUS 213 504 static struct valnode *hashtbl[MODULUS]; 505 static int curval; 506 static int maxval; 507 508 /* Integer constants mapped with the load immediate opcode. */ 509 #define K(i) F(BPF_LD|BPF_IMM|BPF_W, i, 0L) 510 511 struct vmapinfo { 512 int is_const; 513 bpf_int32 const_val; 514 }; 515 516 struct vmapinfo *vmap; 517 struct valnode *vnode_base; 518 struct valnode *next_vnode; 519 520 static void 521 init_val() 522 { 523 curval = 0; 524 next_vnode = vnode_base; 525 memset((char *)vmap, 0, maxval * sizeof(*vmap)); 526 memset((char *)hashtbl, 0, sizeof hashtbl); 527 } 528 529 /* Because we really don't have an IR, this stuff is a little messy. */ 530 static int 531 F(code, v0, v1) 532 int code; 533 int v0, v1; 534 { 535 u_int hash; 536 int val; 537 struct valnode *p; 538 539 hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8); 540 hash %= MODULUS; 541 542 for (p = hashtbl[hash]; p; p = p->next) 543 if (p->code == code && p->v0 == v0 && p->v1 == v1) 544 return p->val; 545 546 val = ++curval; 547 if (BPF_MODE(code) == BPF_IMM && 548 (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) { 549 vmap[val].const_val = v0; 550 vmap[val].is_const = 1; 551 } 552 p = next_vnode++; 553 p->val = val; 554 p->code = code; 555 p->v0 = v0; 556 p->v1 = v1; 557 p->next = hashtbl[hash]; 558 hashtbl[hash] = p; 559 560 return val; 561 } 562 563 static __inline void 564 vstore(s, valp, newval, alter) 565 struct stmt *s; 566 int *valp; 567 int newval; 568 int alter; 569 { 570 if (alter && *valp == newval) 571 s->code = NOP; 572 else 573 *valp = newval; 574 } 575 576 static void 577 fold_op(s, v0, v1) 578 struct stmt *s; 579 int v0, v1; 580 { 581 bpf_int32 a, b; 582 583 a = vmap[v0].const_val; 584 b = vmap[v1].const_val; 585 586 switch (BPF_OP(s->code)) { 587 case BPF_ADD: 588 a += b; 589 break; 590 591 case BPF_SUB: 592 a -= b; 593 break; 594 595 case BPF_MUL: 596 a *= b; 597 break; 598 599 case BPF_DIV: 600 if (b == 0) 601 bpf_error("division by zero"); 602 a /= b; 603 break; 604 605 case BPF_AND: 606 a &= b; 607 break; 608 609 case BPF_OR: 610 a |= b; 611 break; 612 613 case BPF_LSH: 614 a <<= b; 615 break; 616 617 case BPF_RSH: 618 a >>= b; 619 break; 620 621 case BPF_NEG: 622 a = -a; 623 break; 624 625 default: 626 abort(); 627 } 628 s->k = a; 629 s->code = BPF_LD|BPF_IMM; 630 done = 0; 631 } 632 633 static __inline struct slist * 634 this_op(s) 635 struct slist *s; 636 { 637 while (s != 0 && s->s.code == NOP) 638 s = s->next; 639 return s; 640 } 641 642 static void 643 opt_not(b) 644 struct block *b; 645 { 646 struct block *tmp = JT(b); 647 648 JT(b) = JF(b); 649 JF(b) = tmp; 650 } 651 652 static void 653 opt_peep(b) 654 struct block *b; 655 { 656 struct slist *s; 657 struct slist *next, *last; 658 int val; 659 660 s = b->stmts; 661 if (s == 0) 662 return; 663 664 last = s; 665 while (1) { 666 s = this_op(s); 667 if (s == 0) 668 break; 669 next = this_op(s->next); 670 if (next == 0) 671 break; 672 last = next; 673 674 /* 675 * st M[k] --> st M[k] 676 * ldx M[k] tax 677 */ 678 if (s->s.code == BPF_ST && 679 next->s.code == (BPF_LDX|BPF_MEM) && 680 s->s.k == next->s.k) { 681 done = 0; 682 next->s.code = BPF_MISC|BPF_TAX; 683 } 684 /* 685 * ld #k --> ldx #k 686 * tax txa 687 */ 688 if (s->s.code == (BPF_LD|BPF_IMM) && 689 next->s.code == (BPF_MISC|BPF_TAX)) { 690 s->s.code = BPF_LDX|BPF_IMM; 691 next->s.code = BPF_MISC|BPF_TXA; 692 done = 0; 693 } 694 /* 695 * This is an ugly special case, but it happens 696 * when you say tcp[k] or udp[k] where k is a constant. 697 */ 698 if (s->s.code == (BPF_LD|BPF_IMM)) { 699 struct slist *add, *tax, *ild; 700 701 /* 702 * Check that X isn't used on exit from this 703 * block (which the optimizer might cause). 704 * We know the code generator won't generate 705 * any local dependencies. 706 */ 707 if (ATOMELEM(b->out_use, X_ATOM)) 708 break; 709 710 if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B)) 711 add = next; 712 else 713 add = this_op(next->next); 714 if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X)) 715 break; 716 717 tax = this_op(add->next); 718 if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX)) 719 break; 720 721 ild = this_op(tax->next); 722 if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD || 723 BPF_MODE(ild->s.code) != BPF_IND) 724 break; 725 /* 726 * XXX We need to check that X is not 727 * subsequently used. We know we can eliminate the 728 * accumulator modifications since it is defined 729 * by the last stmt of this sequence. 730 * 731 * We want to turn this sequence: 732 * 733 * (004) ldi #0x2 {s} 734 * (005) ldxms [14] {next} -- optional 735 * (006) addx {add} 736 * (007) tax {tax} 737 * (008) ild [x+0] {ild} 738 * 739 * into this sequence: 740 * 741 * (004) nop 742 * (005) ldxms [14] 743 * (006) nop 744 * (007) nop 745 * (008) ild [x+2] 746 * 747 */ 748 ild->s.k += s->s.k; 749 s->s.code = NOP; 750 add->s.code = NOP; 751 tax->s.code = NOP; 752 done = 0; 753 } 754 s = next; 755 } 756 /* 757 * If we have a subtract to do a comparison, and the X register 758 * is a known constant, we can merge this value into the 759 * comparison. 760 */ 761 if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X) && 762 !ATOMELEM(b->out_use, A_ATOM)) { 763 val = b->val[X_ATOM]; 764 if (vmap[val].is_const) { 765 int op; 766 767 b->s.k += vmap[val].const_val; 768 op = BPF_OP(b->s.code); 769 if (op == BPF_JGT || op == BPF_JGE) { 770 struct block *t = JT(b); 771 JT(b) = JF(b); 772 JF(b) = t; 773 b->s.k += 0x80000000; 774 } 775 last->s.code = NOP; 776 done = 0; 777 } else if (b->s.k == 0) { 778 /* 779 * sub x -> nop 780 * j #0 j x 781 */ 782 last->s.code = NOP; 783 b->s.code = BPF_CLASS(b->s.code) | BPF_OP(b->s.code) | 784 BPF_X; 785 done = 0; 786 } 787 } 788 /* 789 * Likewise, a constant subtract can be simplified. 790 */ 791 else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K) && 792 !ATOMELEM(b->out_use, A_ATOM)) { 793 int op; 794 795 b->s.k += last->s.k; 796 last->s.code = NOP; 797 op = BPF_OP(b->s.code); 798 if (op == BPF_JGT || op == BPF_JGE) { 799 struct block *t = JT(b); 800 JT(b) = JF(b); 801 JF(b) = t; 802 b->s.k += 0x80000000; 803 } 804 done = 0; 805 } 806 /* 807 * and #k nop 808 * jeq #0 -> jset #k 809 */ 810 if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) && 811 !ATOMELEM(b->out_use, A_ATOM) && b->s.k == 0) { 812 b->s.k = last->s.k; 813 b->s.code = BPF_JMP|BPF_K|BPF_JSET; 814 last->s.code = NOP; 815 done = 0; 816 opt_not(b); 817 } 818 /* 819 * If the accumulator is a known constant, we can compute the 820 * comparison result. 821 */ 822 val = b->val[A_ATOM]; 823 if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) { 824 bpf_int32 v = vmap[val].const_val; 825 switch (BPF_OP(b->s.code)) { 826 827 case BPF_JEQ: 828 v = v == b->s.k; 829 break; 830 831 case BPF_JGT: 832 v = (unsigned)v > b->s.k; 833 break; 834 835 case BPF_JGE: 836 v = (unsigned)v >= b->s.k; 837 break; 838 839 case BPF_JSET: 840 v &= b->s.k; 841 break; 842 843 default: 844 abort(); 845 } 846 if (JF(b) != JT(b)) 847 done = 0; 848 if (v) 849 JF(b) = JT(b); 850 else 851 JT(b) = JF(b); 852 } 853 } 854 855 /* 856 * Compute the symbolic value of expression of 's', and update 857 * anything it defines in the value table 'val'. If 'alter' is true, 858 * do various optimizations. This code would be cleaner if symbolic 859 * evaluation and code transformations weren't folded together. 860 */ 861 static void 862 opt_stmt(s, val, alter) 863 struct stmt *s; 864 int val[]; 865 int alter; 866 { 867 int op; 868 int v; 869 870 switch (s->code) { 871 872 case BPF_LD|BPF_ABS|BPF_W: 873 case BPF_LD|BPF_ABS|BPF_H: 874 case BPF_LD|BPF_ABS|BPF_B: 875 v = F(s->code, s->k, 0L); 876 vstore(s, &val[A_ATOM], v, alter); 877 break; 878 879 case BPF_LD|BPF_IND|BPF_W: 880 case BPF_LD|BPF_IND|BPF_H: 881 case BPF_LD|BPF_IND|BPF_B: 882 v = val[X_ATOM]; 883 if (alter && vmap[v].is_const) { 884 s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code); 885 s->k += vmap[v].const_val; 886 v = F(s->code, s->k, 0L); 887 done = 0; 888 } 889 else 890 v = F(s->code, s->k, v); 891 vstore(s, &val[A_ATOM], v, alter); 892 break; 893 894 case BPF_LD|BPF_LEN: 895 v = F(s->code, 0L, 0L); 896 vstore(s, &val[A_ATOM], v, alter); 897 break; 898 899 case BPF_LD|BPF_IMM: 900 v = K(s->k); 901 vstore(s, &val[A_ATOM], v, alter); 902 break; 903 904 case BPF_LDX|BPF_IMM: 905 v = K(s->k); 906 vstore(s, &val[X_ATOM], v, alter); 907 break; 908 909 case BPF_LDX|BPF_MSH|BPF_B: 910 v = F(s->code, s->k, 0L); 911 vstore(s, &val[X_ATOM], v, alter); 912 break; 913 914 case BPF_ALU|BPF_NEG: 915 if (alter && vmap[val[A_ATOM]].is_const) { 916 s->code = BPF_LD|BPF_IMM; 917 s->k = -vmap[val[A_ATOM]].const_val; 918 val[A_ATOM] = K(s->k); 919 } 920 else 921 val[A_ATOM] = F(s->code, val[A_ATOM], 0L); 922 break; 923 924 case BPF_ALU|BPF_ADD|BPF_K: 925 case BPF_ALU|BPF_SUB|BPF_K: 926 case BPF_ALU|BPF_MUL|BPF_K: 927 case BPF_ALU|BPF_DIV|BPF_K: 928 case BPF_ALU|BPF_AND|BPF_K: 929 case BPF_ALU|BPF_OR|BPF_K: 930 case BPF_ALU|BPF_LSH|BPF_K: 931 case BPF_ALU|BPF_RSH|BPF_K: 932 op = BPF_OP(s->code); 933 if (alter) { 934 if (s->k == 0) { 935 if (op == BPF_ADD || op == BPF_SUB || 936 op == BPF_LSH || op == BPF_RSH || 937 op == BPF_OR) { 938 s->code = NOP; 939 break; 940 } 941 if (op == BPF_MUL || op == BPF_AND) { 942 s->code = BPF_LD|BPF_IMM; 943 val[A_ATOM] = K(s->k); 944 break; 945 } 946 } 947 if (vmap[val[A_ATOM]].is_const) { 948 fold_op(s, val[A_ATOM], K(s->k)); 949 val[A_ATOM] = K(s->k); 950 break; 951 } 952 } 953 val[A_ATOM] = F(s->code, val[A_ATOM], K(s->k)); 954 break; 955 956 case BPF_ALU|BPF_ADD|BPF_X: 957 case BPF_ALU|BPF_SUB|BPF_X: 958 case BPF_ALU|BPF_MUL|BPF_X: 959 case BPF_ALU|BPF_DIV|BPF_X: 960 case BPF_ALU|BPF_AND|BPF_X: 961 case BPF_ALU|BPF_OR|BPF_X: 962 case BPF_ALU|BPF_LSH|BPF_X: 963 case BPF_ALU|BPF_RSH|BPF_X: 964 op = BPF_OP(s->code); 965 if (alter && vmap[val[X_ATOM]].is_const) { 966 if (vmap[val[A_ATOM]].is_const) { 967 fold_op(s, val[A_ATOM], val[X_ATOM]); 968 val[A_ATOM] = K(s->k); 969 } 970 else { 971 s->code = BPF_ALU|BPF_K|op; 972 s->k = vmap[val[X_ATOM]].const_val; 973 done = 0; 974 val[A_ATOM] = 975 F(s->code, val[A_ATOM], K(s->k)); 976 } 977 break; 978 } 979 /* 980 * Check if we're doing something to an accumulator 981 * that is 0, and simplify. This may not seem like 982 * much of a simplification but it could open up further 983 * optimizations. 984 * XXX We could also check for mul by 1, and -1, etc. 985 */ 986 if (alter && vmap[val[A_ATOM]].is_const 987 && vmap[val[A_ATOM]].const_val == 0) { 988 if (op == BPF_ADD || op == BPF_OR || 989 op == BPF_LSH || op == BPF_RSH || op == BPF_SUB) { 990 s->code = BPF_MISC|BPF_TXA; 991 vstore(s, &val[A_ATOM], val[X_ATOM], alter); 992 break; 993 } 994 else if (op == BPF_MUL || op == BPF_DIV || 995 op == BPF_AND) { 996 s->code = BPF_LD|BPF_IMM; 997 s->k = 0; 998 vstore(s, &val[A_ATOM], K(s->k), alter); 999 break; 1000 } 1001 else if (op == BPF_NEG) { 1002 s->code = NOP; 1003 break; 1004 } 1005 } 1006 val[A_ATOM] = F(s->code, val[A_ATOM], val[X_ATOM]); 1007 break; 1008 1009 case BPF_MISC|BPF_TXA: 1010 vstore(s, &val[A_ATOM], val[X_ATOM], alter); 1011 break; 1012 1013 case BPF_LD|BPF_MEM: 1014 v = val[s->k]; 1015 if (alter && vmap[v].is_const) { 1016 s->code = BPF_LD|BPF_IMM; 1017 s->k = vmap[v].const_val; 1018 done = 0; 1019 } 1020 vstore(s, &val[A_ATOM], v, alter); 1021 break; 1022 1023 case BPF_MISC|BPF_TAX: 1024 vstore(s, &val[X_ATOM], val[A_ATOM], alter); 1025 break; 1026 1027 case BPF_LDX|BPF_MEM: 1028 v = val[s->k]; 1029 if (alter && vmap[v].is_const) { 1030 s->code = BPF_LDX|BPF_IMM; 1031 s->k = vmap[v].const_val; 1032 done = 0; 1033 } 1034 vstore(s, &val[X_ATOM], v, alter); 1035 break; 1036 1037 case BPF_ST: 1038 vstore(s, &val[s->k], val[A_ATOM], alter); 1039 break; 1040 1041 case BPF_STX: 1042 vstore(s, &val[s->k], val[X_ATOM], alter); 1043 break; 1044 } 1045 } 1046 1047 static void 1048 deadstmt(s, last) 1049 register struct stmt *s; 1050 register struct stmt *last[]; 1051 { 1052 register int atom; 1053 1054 atom = atomuse(s); 1055 if (atom >= 0) { 1056 if (atom == AX_ATOM) { 1057 last[X_ATOM] = 0; 1058 last[A_ATOM] = 0; 1059 } 1060 else 1061 last[atom] = 0; 1062 } 1063 atom = atomdef(s); 1064 if (atom >= 0) { 1065 if (last[atom]) { 1066 done = 0; 1067 last[atom]->code = NOP; 1068 } 1069 last[atom] = s; 1070 } 1071 } 1072 1073 static void 1074 opt_deadstores(b) 1075 register struct block *b; 1076 { 1077 register struct slist *s; 1078 register int atom; 1079 struct stmt *last[N_ATOMS]; 1080 1081 memset((char *)last, 0, sizeof last); 1082 1083 for (s = b->stmts; s != 0; s = s->next) 1084 deadstmt(&s->s, last); 1085 deadstmt(&b->s, last); 1086 1087 for (atom = 0; atom < N_ATOMS; ++atom) 1088 if (last[atom] && !ATOMELEM(b->out_use, atom)) { 1089 last[atom]->code = NOP; 1090 done = 0; 1091 } 1092 } 1093 1094 static void 1095 opt_blk(b, do_stmts) 1096 struct block *b; 1097 int do_stmts; 1098 { 1099 struct slist *s; 1100 struct edge *p; 1101 int i; 1102 bpf_int32 aval; 1103 1104 #if 0 1105 for (s = b->stmts; s && s->next; s = s->next) 1106 if (BPF_CLASS(s->s.code) == BPF_JMP) { 1107 do_stmts = 0; 1108 break; 1109 } 1110 #endif 1111 1112 /* 1113 * Initialize the atom values. 1114 * If we have no predecessors, everything is undefined. 1115 * Otherwise, we inherent our values from our predecessors. 1116 * If any register has an ambiguous value (i.e. control paths are 1117 * merging) give it the undefined value of 0. 1118 */ 1119 p = b->in_edges; 1120 if (p == 0) 1121 memset((char *)b->val, 0, sizeof(b->val)); 1122 else { 1123 memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val)); 1124 while ((p = p->next) != NULL) { 1125 for (i = 0; i < N_ATOMS; ++i) 1126 if (b->val[i] != p->pred->val[i]) 1127 b->val[i] = 0; 1128 } 1129 } 1130 aval = b->val[A_ATOM]; 1131 for (s = b->stmts; s; s = s->next) 1132 opt_stmt(&s->s, b->val, do_stmts); 1133 1134 /* 1135 * This is a special case: if we don't use anything from this 1136 * block, and we load the accumulator with value that is 1137 * already there, or if this block is a return, 1138 * eliminate all the statements. 1139 */ 1140 if (do_stmts && 1141 ((b->out_use == 0 && aval != 0 &&b->val[A_ATOM] == aval) || 1142 BPF_CLASS(b->s.code) == BPF_RET)) { 1143 if (b->stmts != 0) { 1144 b->stmts = 0; 1145 done = 0; 1146 } 1147 } else { 1148 opt_peep(b); 1149 opt_deadstores(b); 1150 } 1151 /* 1152 * Set up values for branch optimizer. 1153 */ 1154 if (BPF_SRC(b->s.code) == BPF_K) 1155 b->oval = K(b->s.k); 1156 else 1157 b->oval = b->val[X_ATOM]; 1158 b->et.code = b->s.code; 1159 b->ef.code = -b->s.code; 1160 } 1161 1162 /* 1163 * Return true if any register that is used on exit from 'succ', has 1164 * an exit value that is different from the corresponding exit value 1165 * from 'b'. 1166 */ 1167 static int 1168 use_conflict(b, succ) 1169 struct block *b, *succ; 1170 { 1171 int atom; 1172 atomset use = succ->out_use; 1173 1174 if (use == 0) 1175 return 0; 1176 1177 for (atom = 0; atom < N_ATOMS; ++atom) 1178 if (ATOMELEM(use, atom)) 1179 if (b->val[atom] != succ->val[atom]) 1180 return 1; 1181 return 0; 1182 } 1183 1184 static struct block * 1185 fold_edge(child, ep) 1186 struct block *child; 1187 struct edge *ep; 1188 { 1189 int sense; 1190 int aval0, aval1, oval0, oval1; 1191 int code = ep->code; 1192 1193 if (code < 0) { 1194 code = -code; 1195 sense = 0; 1196 } else 1197 sense = 1; 1198 1199 if (child->s.code != code) 1200 return 0; 1201 1202 aval0 = child->val[A_ATOM]; 1203 oval0 = child->oval; 1204 aval1 = ep->pred->val[A_ATOM]; 1205 oval1 = ep->pred->oval; 1206 1207 if (aval0 != aval1) 1208 return 0; 1209 1210 if (oval0 == oval1) 1211 /* 1212 * The operands are identical, so the 1213 * result is true if a true branch was 1214 * taken to get here, otherwise false. 1215 */ 1216 return sense ? JT(child) : JF(child); 1217 1218 if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K)) 1219 /* 1220 * At this point, we only know the comparison if we 1221 * came down the true branch, and it was an equality 1222 * comparison with a constant. We rely on the fact that 1223 * distinct constants have distinct value numbers. 1224 */ 1225 return JF(child); 1226 1227 return 0; 1228 } 1229 1230 static void 1231 opt_j(ep) 1232 struct edge *ep; 1233 { 1234 register int i, k; 1235 register struct block *target; 1236 1237 if (JT(ep->succ) == 0) 1238 return; 1239 1240 if (JT(ep->succ) == JF(ep->succ)) { 1241 /* 1242 * Common branch targets can be eliminated, provided 1243 * there is no data dependency. 1244 */ 1245 if (!use_conflict(ep->pred, ep->succ->et.succ)) { 1246 done = 0; 1247 ep->succ = JT(ep->succ); 1248 } 1249 } 1250 /* 1251 * For each edge dominator that matches the successor of this 1252 * edge, promote the edge successor to the its grandchild. 1253 * 1254 * XXX We violate the set abstraction here in favor a reasonably 1255 * efficient loop. 1256 */ 1257 top: 1258 for (i = 0; i < edgewords; ++i) { 1259 register bpf_u_int32 x = ep->edom[i]; 1260 1261 while (x != 0) { 1262 k = ffs(x) - 1; 1263 x &=~ (1 << k); 1264 k += i * BITS_PER_WORD; 1265 1266 target = fold_edge(ep->succ, edges[k]); 1267 /* 1268 * Check that there is no data dependency between 1269 * nodes that will be violated if we move the edge. 1270 */ 1271 if (target != 0 && !use_conflict(ep->pred, target)) { 1272 done = 0; 1273 ep->succ = target; 1274 if (JT(target) != 0) 1275 /* 1276 * Start over unless we hit a leaf. 1277 */ 1278 goto top; 1279 return; 1280 } 1281 } 1282 } 1283 } 1284 1285 1286 static void 1287 or_pullup(b) 1288 struct block *b; 1289 { 1290 int val, at_top; 1291 struct block *pull; 1292 struct block **diffp, **samep; 1293 struct edge *ep; 1294 1295 ep = b->in_edges; 1296 if (ep == 0) 1297 return; 1298 1299 /* 1300 * Make sure each predecessor loads the same value. 1301 * XXX why? 1302 */ 1303 val = ep->pred->val[A_ATOM]; 1304 for (ep = ep->next; ep != 0; ep = ep->next) 1305 if (val != ep->pred->val[A_ATOM]) 1306 return; 1307 1308 if (JT(b->in_edges->pred) == b) 1309 diffp = &JT(b->in_edges->pred); 1310 else 1311 diffp = &JF(b->in_edges->pred); 1312 1313 at_top = 1; 1314 while (1) { 1315 if (*diffp == 0) 1316 return; 1317 1318 if (JT(*diffp) != JT(b)) 1319 return; 1320 1321 if (!SET_MEMBER((*diffp)->dom, b->id)) 1322 return; 1323 1324 if ((*diffp)->val[A_ATOM] != val) 1325 break; 1326 1327 diffp = &JF(*diffp); 1328 at_top = 0; 1329 } 1330 samep = &JF(*diffp); 1331 while (1) { 1332 if (*samep == 0) 1333 return; 1334 1335 if (JT(*samep) != JT(b)) 1336 return; 1337 1338 if (!SET_MEMBER((*samep)->dom, b->id)) 1339 return; 1340 1341 if ((*samep)->val[A_ATOM] == val) 1342 break; 1343 1344 /* XXX Need to check that there are no data dependencies 1345 between dp0 and dp1. Currently, the code generator 1346 will not produce such dependencies. */ 1347 samep = &JF(*samep); 1348 } 1349 #ifdef notdef 1350 /* XXX This doesn't cover everything. */ 1351 for (i = 0; i < N_ATOMS; ++i) 1352 if ((*samep)->val[i] != pred->val[i]) 1353 return; 1354 #endif 1355 /* Pull up the node. */ 1356 pull = *samep; 1357 *samep = JF(pull); 1358 JF(pull) = *diffp; 1359 1360 /* 1361 * At the top of the chain, each predecessor needs to point at the 1362 * pulled up node. Inside the chain, there is only one predecessor 1363 * to worry about. 1364 */ 1365 if (at_top) { 1366 for (ep = b->in_edges; ep != 0; ep = ep->next) { 1367 if (JT(ep->pred) == b) 1368 JT(ep->pred) = pull; 1369 else 1370 JF(ep->pred) = pull; 1371 } 1372 } 1373 else 1374 *diffp = pull; 1375 1376 done = 0; 1377 } 1378 1379 static void 1380 and_pullup(b) 1381 struct block *b; 1382 { 1383 int val, at_top; 1384 struct block *pull; 1385 struct block **diffp, **samep; 1386 struct edge *ep; 1387 1388 ep = b->in_edges; 1389 if (ep == 0) 1390 return; 1391 1392 /* 1393 * Make sure each predecessor loads the same value. 1394 */ 1395 val = ep->pred->val[A_ATOM]; 1396 for (ep = ep->next; ep != 0; ep = ep->next) 1397 if (val != ep->pred->val[A_ATOM]) 1398 return; 1399 1400 if (JT(b->in_edges->pred) == b) 1401 diffp = &JT(b->in_edges->pred); 1402 else 1403 diffp = &JF(b->in_edges->pred); 1404 1405 at_top = 1; 1406 while (1) { 1407 if (*diffp == 0) 1408 return; 1409 1410 if (JF(*diffp) != JF(b)) 1411 return; 1412 1413 if (!SET_MEMBER((*diffp)->dom, b->id)) 1414 return; 1415 1416 if ((*diffp)->val[A_ATOM] != val) 1417 break; 1418 1419 diffp = &JT(*diffp); 1420 at_top = 0; 1421 } 1422 samep = &JT(*diffp); 1423 while (1) { 1424 if (*samep == 0) 1425 return; 1426 1427 if (JF(*samep) != JF(b)) 1428 return; 1429 1430 if (!SET_MEMBER((*samep)->dom, b->id)) 1431 return; 1432 1433 if ((*samep)->val[A_ATOM] == val) 1434 break; 1435 1436 /* XXX Need to check that there are no data dependencies 1437 between diffp and samep. Currently, the code generator 1438 will not produce such dependencies. */ 1439 samep = &JT(*samep); 1440 } 1441 #ifdef notdef 1442 /* XXX This doesn't cover everything. */ 1443 for (i = 0; i < N_ATOMS; ++i) 1444 if ((*samep)->val[i] != pred->val[i]) 1445 return; 1446 #endif 1447 /* Pull up the node. */ 1448 pull = *samep; 1449 *samep = JT(pull); 1450 JT(pull) = *diffp; 1451 1452 /* 1453 * At the top of the chain, each predecessor needs to point at the 1454 * pulled up node. Inside the chain, there is only one predecessor 1455 * to worry about. 1456 */ 1457 if (at_top) { 1458 for (ep = b->in_edges; ep != 0; ep = ep->next) { 1459 if (JT(ep->pred) == b) 1460 JT(ep->pred) = pull; 1461 else 1462 JF(ep->pred) = pull; 1463 } 1464 } 1465 else 1466 *diffp = pull; 1467 1468 done = 0; 1469 } 1470 1471 static void 1472 opt_blks(root, do_stmts) 1473 struct block *root; 1474 int do_stmts; 1475 { 1476 int i, maxlevel; 1477 struct block *p; 1478 1479 init_val(); 1480 maxlevel = root->level; 1481 for (i = maxlevel; i >= 0; --i) 1482 for (p = levels[i]; p; p = p->link) 1483 opt_blk(p, do_stmts); 1484 1485 if (do_stmts) 1486 /* 1487 * No point trying to move branches; it can't possibly 1488 * make a difference at this point. 1489 */ 1490 return; 1491 1492 for (i = 1; i <= maxlevel; ++i) { 1493 for (p = levels[i]; p; p = p->link) { 1494 opt_j(&p->et); 1495 opt_j(&p->ef); 1496 } 1497 } 1498 for (i = 1; i <= maxlevel; ++i) { 1499 for (p = levels[i]; p; p = p->link) { 1500 or_pullup(p); 1501 and_pullup(p); 1502 } 1503 } 1504 } 1505 1506 static __inline void 1507 link_inedge(parent, child) 1508 struct edge *parent; 1509 struct block *child; 1510 { 1511 parent->next = child->in_edges; 1512 child->in_edges = parent; 1513 } 1514 1515 static void 1516 find_inedges(root) 1517 struct block *root; 1518 { 1519 int i; 1520 struct block *b; 1521 1522 for (i = 0; i < n_blocks; ++i) 1523 blocks[i]->in_edges = 0; 1524 1525 /* 1526 * Traverse the graph, adding each edge to the predecessor 1527 * list of its successors. Skip the leaves (i.e. level 0). 1528 */ 1529 for (i = root->level; i > 0; --i) { 1530 for (b = levels[i]; b != 0; b = b->link) { 1531 link_inedge(&b->et, JT(b)); 1532 link_inedge(&b->ef, JF(b)); 1533 } 1534 } 1535 } 1536 1537 static void 1538 opt_root(b) 1539 struct block **b; 1540 { 1541 struct slist *tmp, *s; 1542 1543 s = (*b)->stmts; 1544 (*b)->stmts = 0; 1545 while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b)) 1546 *b = JT(*b); 1547 1548 tmp = (*b)->stmts; 1549 if (tmp != 0) 1550 sappend(s, tmp); 1551 (*b)->stmts = s; 1552 1553 /* 1554 * If the root node is a return, then there is no 1555 * point executing any statements (since the bpf machine 1556 * has no side effects). 1557 */ 1558 if (BPF_CLASS((*b)->s.code) == BPF_RET) 1559 (*b)->stmts = 0; 1560 } 1561 1562 static void 1563 opt_loop(root, do_stmts) 1564 struct block *root; 1565 int do_stmts; 1566 { 1567 1568 #ifdef BDEBUG 1569 if (dflag > 1) 1570 opt_dump(root); 1571 #endif 1572 do { 1573 done = 1; 1574 find_levels(root); 1575 find_dom(root); 1576 find_closure(root); 1577 find_inedges(root); 1578 find_ud(root); 1579 find_edom(root); 1580 opt_blks(root, do_stmts); 1581 #ifdef BDEBUG 1582 if (dflag > 1) 1583 opt_dump(root); 1584 #endif 1585 } while (!done); 1586 } 1587 1588 /* 1589 * Optimize the filter code in its dag representation. 1590 */ 1591 void 1592 bpf_optimize(rootp) 1593 struct block **rootp; 1594 { 1595 struct block *root; 1596 1597 root = *rootp; 1598 1599 opt_init(root); 1600 opt_loop(root, 0); 1601 opt_loop(root, 1); 1602 intern_blocks(root); 1603 opt_root(rootp); 1604 opt_cleanup(); 1605 } 1606 1607 static void 1608 make_marks(p) 1609 struct block *p; 1610 { 1611 if (!isMarked(p)) { 1612 Mark(p); 1613 if (BPF_CLASS(p->s.code) != BPF_RET) { 1614 make_marks(JT(p)); 1615 make_marks(JF(p)); 1616 } 1617 } 1618 } 1619 1620 /* 1621 * Mark code array such that isMarked(i) is true 1622 * only for nodes that are alive. 1623 */ 1624 static void 1625 mark_code(p) 1626 struct block *p; 1627 { 1628 cur_mark += 1; 1629 make_marks(p); 1630 } 1631 1632 /* 1633 * True iff the two stmt lists load the same value from the packet into 1634 * the accumulator. 1635 */ 1636 static int 1637 eq_slist(x, y) 1638 struct slist *x, *y; 1639 { 1640 while (1) { 1641 while (x && x->s.code == NOP) 1642 x = x->next; 1643 while (y && y->s.code == NOP) 1644 y = y->next; 1645 if (x == 0) 1646 return y == 0; 1647 if (y == 0) 1648 return x == 0; 1649 if (x->s.code != y->s.code || x->s.k != y->s.k) 1650 return 0; 1651 x = x->next; 1652 y = y->next; 1653 } 1654 } 1655 1656 static __inline int 1657 eq_blk(b0, b1) 1658 struct block *b0, *b1; 1659 { 1660 if (b0->s.code == b1->s.code && 1661 b0->s.k == b1->s.k && 1662 b0->et.succ == b1->et.succ && 1663 b0->ef.succ == b1->ef.succ) 1664 return eq_slist(b0->stmts, b1->stmts); 1665 return 0; 1666 } 1667 1668 static void 1669 intern_blocks(root) 1670 struct block *root; 1671 { 1672 struct block *p; 1673 int i, j; 1674 int done; 1675 top: 1676 done = 1; 1677 for (i = 0; i < n_blocks; ++i) 1678 blocks[i]->link = 0; 1679 1680 mark_code(root); 1681 1682 for (i = n_blocks - 1; --i >= 0; ) { 1683 if (!isMarked(blocks[i])) 1684 continue; 1685 for (j = i + 1; j < n_blocks; ++j) { 1686 if (!isMarked(blocks[j])) 1687 continue; 1688 if (eq_blk(blocks[i], blocks[j])) { 1689 blocks[i]->link = blocks[j]->link ? 1690 blocks[j]->link : blocks[j]; 1691 break; 1692 } 1693 } 1694 } 1695 for (i = 0; i < n_blocks; ++i) { 1696 p = blocks[i]; 1697 if (JT(p) == 0) 1698 continue; 1699 if (JT(p)->link) { 1700 done = 0; 1701 JT(p) = JT(p)->link; 1702 } 1703 if (JF(p)->link) { 1704 done = 0; 1705 JF(p) = JF(p)->link; 1706 } 1707 } 1708 if (!done) 1709 goto top; 1710 } 1711 1712 static void 1713 opt_cleanup() 1714 { 1715 free((void *)vnode_base); 1716 free((void *)vmap); 1717 free((void *)edges); 1718 free((void *)space); 1719 free((void *)levels); 1720 free((void *)blocks); 1721 } 1722 1723 /* 1724 * Return the number of stmts in 's'. 1725 */ 1726 static int 1727 slength(s) 1728 struct slist *s; 1729 { 1730 int n = 0; 1731 1732 for (; s; s = s->next) 1733 if (s->s.code != NOP) 1734 ++n; 1735 return n; 1736 } 1737 1738 /* 1739 * Return the number of nodes reachable by 'p'. 1740 * All nodes should be initially unmarked. 1741 */ 1742 static int 1743 count_blocks(p) 1744 struct block *p; 1745 { 1746 if (p == 0 || isMarked(p)) 1747 return 0; 1748 Mark(p); 1749 return count_blocks(JT(p)) + count_blocks(JF(p)) + 1; 1750 } 1751 1752 /* 1753 * Do a depth first search on the flow graph, numbering the 1754 * the basic blocks, and entering them into the 'blocks' array.` 1755 */ 1756 static void 1757 number_blks_r(p) 1758 struct block *p; 1759 { 1760 int n; 1761 1762 if (p == 0 || isMarked(p)) 1763 return; 1764 1765 Mark(p); 1766 n = n_blocks++; 1767 p->id = n; 1768 blocks[n] = p; 1769 1770 number_blks_r(JT(p)); 1771 number_blks_r(JF(p)); 1772 } 1773 1774 /* 1775 * Return the number of stmts in the flowgraph reachable by 'p'. 1776 * The nodes should be unmarked before calling. 1777 */ 1778 static int 1779 count_stmts(p) 1780 struct block *p; 1781 { 1782 int n; 1783 1784 if (p == 0 || isMarked(p)) 1785 return 0; 1786 Mark(p); 1787 n = count_stmts(JT(p)) + count_stmts(JF(p)); 1788 return slength(p->stmts) + n + 1 + p->longjt + p->longjf; 1789 } 1790 1791 /* 1792 * Allocate memory. All allocation is done before optimization 1793 * is begun. A linear bound on the size of all data structures is computed 1794 * from the total number of blocks and/or statements. 1795 */ 1796 static void 1797 opt_init(root) 1798 struct block *root; 1799 { 1800 bpf_u_int32 *p; 1801 int i, n, max_stmts; 1802 1803 /* 1804 * First, count the blocks, so we can malloc an array to map 1805 * block number to block. Then, put the blocks into the array. 1806 */ 1807 unMarkAll(); 1808 n = count_blocks(root); 1809 blocks = (struct block **)calloc(n, sizeof(*blocks)); 1810 if (blocks == NULL) 1811 bpf_error("malloc"); 1812 1813 unMarkAll(); 1814 n_blocks = 0; 1815 number_blks_r(root); 1816 1817 n_edges = 2 * n_blocks; 1818 edges = (struct edge **)calloc(n_edges, sizeof(*edges)); 1819 if (edges == NULL) 1820 bpf_error("malloc"); 1821 1822 /* 1823 * The number of levels is bounded by the number of nodes. 1824 */ 1825 levels = (struct block **)calloc(n_blocks, sizeof(*levels)); 1826 if (levels == NULL) 1827 bpf_error("malloc"); 1828 1829 edgewords = n_edges / (8 * sizeof(bpf_u_int32)) + 1; 1830 nodewords = n_blocks / (8 * sizeof(bpf_u_int32)) + 1; 1831 1832 /* XXX */ 1833 space = (bpf_u_int32 *)malloc(2 * n_blocks * nodewords * sizeof(*space) 1834 + n_edges * edgewords * sizeof(*space)); 1835 if (space == NULL) 1836 bpf_error("malloc"); 1837 1838 p = space; 1839 all_dom_sets = p; 1840 for (i = 0; i < n; ++i) { 1841 blocks[i]->dom = p; 1842 p += nodewords; 1843 } 1844 all_closure_sets = p; 1845 for (i = 0; i < n; ++i) { 1846 blocks[i]->closure = p; 1847 p += nodewords; 1848 } 1849 all_edge_sets = p; 1850 for (i = 0; i < n; ++i) { 1851 register struct block *b = blocks[i]; 1852 1853 b->et.edom = p; 1854 p += edgewords; 1855 b->ef.edom = p; 1856 p += edgewords; 1857 b->et.id = i; 1858 edges[i] = &b->et; 1859 b->ef.id = n_blocks + i; 1860 edges[n_blocks + i] = &b->ef; 1861 b->et.pred = b; 1862 b->ef.pred = b; 1863 } 1864 max_stmts = 0; 1865 for (i = 0; i < n; ++i) 1866 max_stmts += slength(blocks[i]->stmts) + 1; 1867 /* 1868 * We allocate at most 3 value numbers per statement, 1869 * so this is an upper bound on the number of valnodes 1870 * we'll need. 1871 */ 1872 maxval = 3 * max_stmts; 1873 vmap = (struct vmapinfo *)calloc(maxval, sizeof(*vmap)); 1874 vnode_base = (struct valnode *)calloc(maxval, sizeof(*vmap)); 1875 if (vmap == NULL || vnode_base == NULL) 1876 bpf_error("malloc"); 1877 } 1878 1879 /* 1880 * Some pointers used to convert the basic block form of the code, 1881 * into the array form that BPF requires. 'fstart' will point to 1882 * the malloc'd array while 'ftail' is used during the recursive traversal. 1883 */ 1884 static struct bpf_insn *fstart; 1885 static struct bpf_insn *ftail; 1886 1887 #ifdef BDEBUG 1888 int bids[1000]; 1889 #endif 1890 1891 /* 1892 * Returns true if successful. Returns false if a branch has 1893 * an offset that is too large. If so, we have marked that 1894 * branch so that on a subsequent iteration, it will be treated 1895 * properly. 1896 */ 1897 static int 1898 convert_code_r(p) 1899 struct block *p; 1900 { 1901 struct bpf_insn *dst; 1902 struct slist *src; 1903 int slen; 1904 u_int off; 1905 int extrajmps; /* number of extra jumps inserted */ 1906 struct slist **offset = NULL; 1907 1908 if (p == 0 || isMarked(p)) 1909 return (1); 1910 Mark(p); 1911 1912 if (convert_code_r(JF(p)) == 0) 1913 return (0); 1914 if (convert_code_r(JT(p)) == 0) 1915 return (0); 1916 1917 slen = slength(p->stmts); 1918 dst = ftail -= (slen + 1 + p->longjt + p->longjf); 1919 /* inflate length by any extra jumps */ 1920 1921 p->offset = dst - fstart; 1922 1923 /* generate offset[] for convenience */ 1924 if (slen) { 1925 offset = calloc(slen, sizeof(struct slist *)); 1926 if (!offset) { 1927 bpf_error("not enough core"); 1928 /*NOTREACHED*/ 1929 } 1930 } 1931 src = p->stmts; 1932 for (off = 0; off < slen && src; off++) { 1933 #if 0 1934 printf("off=%d src=%x\n", off, src); 1935 #endif 1936 offset[off] = src; 1937 src = src->next; 1938 } 1939 1940 off = 0; 1941 for (src = p->stmts; src; src = src->next) { 1942 if (src->s.code == NOP) 1943 continue; 1944 dst->code = (u_short)src->s.code; 1945 dst->k = src->s.k; 1946 1947 /* fill block-local relative jump */ 1948 if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) { 1949 #if 0 1950 if (src->s.jt || src->s.jf) { 1951 bpf_error("illegal jmp destination"); 1952 /*NOTREACHED*/ 1953 } 1954 #endif 1955 goto filled; 1956 } 1957 if (off == slen - 2) /*???*/ 1958 goto filled; 1959 1960 { 1961 int i; 1962 int jt, jf; 1963 char *ljerr = "%s for block-local relative jump: off=%d"; 1964 1965 #if 0 1966 printf("code=%x off=%d %x %x\n", src->s.code, 1967 off, src->s.jt, src->s.jf); 1968 #endif 1969 1970 if (!src->s.jt || !src->s.jf) { 1971 bpf_error(ljerr, "no jmp destination", off); 1972 /*NOTREACHED*/ 1973 } 1974 1975 jt = jf = 0; 1976 for (i = 0; i < slen; i++) { 1977 if (offset[i] == src->s.jt) { 1978 if (jt) { 1979 bpf_error(ljerr, "multiple matches", off); 1980 /*NOTREACHED*/ 1981 } 1982 1983 dst->jt = i - off - 1; 1984 jt++; 1985 } 1986 if (offset[i] == src->s.jf) { 1987 if (jf) { 1988 bpf_error(ljerr, "multiple matches", off); 1989 /*NOTREACHED*/ 1990 } 1991 dst->jf = i - off - 1; 1992 jf++; 1993 } 1994 } 1995 if (!jt || !jf) { 1996 bpf_error(ljerr, "no destination found", off); 1997 /*NOTREACHED*/ 1998 } 1999 } 2000 filled: 2001 ++dst; 2002 ++off; 2003 } 2004 if (offset) 2005 free(offset); 2006 2007 #ifdef BDEBUG 2008 bids[dst - fstart] = p->id + 1; 2009 #endif 2010 dst->code = (u_short)p->s.code; 2011 dst->k = p->s.k; 2012 if (JT(p)) { 2013 extrajmps = 0; 2014 off = JT(p)->offset - (p->offset + slen) - 1; 2015 if (off >= 256) { 2016 /* offset too large for branch, must add a jump */ 2017 if (p->longjt == 0) { 2018 /* mark this instruction and retry */ 2019 p->longjt++; 2020 return(0); 2021 } 2022 /* branch if T to following jump */ 2023 dst->jt = extrajmps; 2024 extrajmps++; 2025 dst[extrajmps].code = BPF_JMP|BPF_JA; 2026 dst[extrajmps].k = off - extrajmps; 2027 } 2028 else 2029 dst->jt = off; 2030 off = JF(p)->offset - (p->offset + slen) - 1; 2031 if (off >= 256) { 2032 /* offset too large for branch, must add a jump */ 2033 if (p->longjf == 0) { 2034 /* mark this instruction and retry */ 2035 p->longjf++; 2036 return(0); 2037 } 2038 /* branch if F to following jump */ 2039 /* if two jumps are inserted, F goes to second one */ 2040 dst->jf = extrajmps; 2041 extrajmps++; 2042 dst[extrajmps].code = BPF_JMP|BPF_JA; 2043 dst[extrajmps].k = off - extrajmps; 2044 } 2045 else 2046 dst->jf = off; 2047 } 2048 return (1); 2049 } 2050 2051 2052 /* 2053 * Convert flowgraph intermediate representation to the 2054 * BPF array representation. Set *lenp to the number of instructions. 2055 */ 2056 struct bpf_insn * 2057 icode_to_fcode(root, lenp) 2058 struct block *root; 2059 int *lenp; 2060 { 2061 int n; 2062 struct bpf_insn *fp; 2063 2064 /* 2065 * Loop doing convert_codr_r() until no branches remain 2066 * with too-large offsets. 2067 */ 2068 while (1) { 2069 unMarkAll(); 2070 n = *lenp = count_stmts(root); 2071 2072 fp = calloc(n, sizeof(*fp)); 2073 if (fp == NULL) 2074 bpf_error("calloc"); 2075 2076 fstart = fp; 2077 ftail = fp + n; 2078 2079 unMarkAll(); 2080 if (convert_code_r(root)) 2081 break; 2082 free(fp); 2083 } 2084 2085 return fp; 2086 } 2087 2088 #ifdef BDEBUG 2089 static void 2090 opt_dump(root) 2091 struct block *root; 2092 { 2093 struct bpf_program f; 2094 2095 memset(bids, 0, sizeof bids); 2096 f.bf_insns = icode_to_fcode(root, &f.bf_len); 2097 bpf_dump(&f, 1); 2098 putchar('\n'); 2099 free((char *)f.bf_insns); 2100 } 2101 #endif 2102