1 /* $NetBSD: optimize.c,v 1.9 2017/01/24 22:29:28 christos Exp $ */ 2 3 /* 4 * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that: (1) source code distributions 9 * retain the above copyright notice and this paragraph in its entirety, (2) 10 * distributions including binary code include the above copyright notice and 11 * this paragraph in its entirety in the documentation or other materials 12 * provided with the distribution, and (3) all advertising materials mentioning 13 * features or use of this software display the following acknowledgement: 14 * ``This product includes software developed by the University of California, 15 * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of 16 * the University nor the names of its contributors may be used to endorse 17 * or promote products derived from this software without specific prior 18 * written permission. 19 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED 20 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF 21 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 22 * 23 * Optimization module for tcpdump intermediate representation. 24 */ 25 26 #include <sys/cdefs.h> 27 __RCSID("$NetBSD: optimize.c,v 1.9 2017/01/24 22:29:28 christos Exp $"); 28 29 #ifdef HAVE_CONFIG_H 30 #include "config.h" 31 #endif 32 33 #ifdef _WIN32 34 #include <pcap-stdinc.h> 35 #else /* _WIN32 */ 36 #if HAVE_INTTYPES_H 37 #include <inttypes.h> 38 #elif HAVE_STDINT_H 39 #include <stdint.h> 40 #endif 41 #ifdef HAVE_SYS_BITYPES_H 42 #include <sys/bitypes.h> 43 #endif 44 #include <sys/types.h> 45 #endif /* _WIN32 */ 46 47 #include <stdio.h> 48 #include <stdlib.h> 49 #include <memory.h> 50 #include <string.h> 51 52 #include <errno.h> 53 54 #include "pcap-int.h" 55 56 #include "gencode.h" 57 58 #ifdef HAVE_OS_PROTO_H 59 #include "os-proto.h" 60 #endif 61 62 #ifdef BDEBUG 63 int pcap_optimizer_debug; 64 #endif 65 66 #if defined(MSDOS) && !defined(__DJGPP__) 67 extern int _w32_ffs (int mask); 68 #define ffs _w32_ffs 69 #endif 70 71 /* 72 * So is the check for _MSC_VER done because MinGW has this? 73 */ 74 #if defined(_WIN32) && defined (_MSC_VER) 75 /* 76 * ffs -- vax ffs instruction 77 * 78 * XXX - with versions of VS that have it, use _BitScanForward()? 79 */ 80 static int 81 ffs(int mask) 82 { 83 int bit; 84 85 if (mask == 0) 86 return(0); 87 for (bit = 1; !(mask & 1); bit++) 88 mask >>= 1; 89 return(bit); 90 } 91 #endif 92 93 /* 94 * Represents a deleted instruction. 95 */ 96 #define NOP -1 97 98 /* 99 * Register numbers for use-def values. 100 * 0 through BPF_MEMWORDS-1 represent the corresponding scratch memory 101 * location. A_ATOM is the accumulator and X_ATOM is the index 102 * register. 103 */ 104 #define A_ATOM BPF_MEMWORDS 105 #define X_ATOM (BPF_MEMWORDS+1) 106 107 /* 108 * This define is used to represent *both* the accumulator and 109 * x register in use-def computations. 110 * Currently, the use-def code assumes only one definition per instruction. 111 */ 112 #define AX_ATOM N_ATOMS 113 114 /* 115 * These data structures are used in a Cocke and Shwarz style 116 * value numbering scheme. Since the flowgraph is acyclic, 117 * exit values can be propagated from a node's predecessors 118 * provided it is uniquely defined. 119 */ 120 struct valnode { 121 int code; 122 int v0, v1; 123 int val; 124 struct valnode *next; 125 }; 126 127 /* Integer constants mapped with the load immediate opcode. */ 128 #define K(i) F(opt_state, BPF_LD|BPF_IMM|BPF_W, i, 0L) 129 130 struct vmapinfo { 131 int is_const; 132 bpf_int32 const_val; 133 }; 134 135 struct _opt_state { 136 /* 137 * A flag to indicate that further optimization is needed. 138 * Iterative passes are continued until a given pass yields no 139 * branch movement. 140 */ 141 int done; 142 143 int n_blocks; 144 struct block **blocks; 145 int n_edges; 146 struct edge **edges; 147 148 /* 149 * A bit vector set representation of the dominators. 150 * We round up the set size to the next power of two. 151 */ 152 int nodewords; 153 int edgewords; 154 struct block **levels; 155 bpf_u_int32 *space; 156 157 #define BITS_PER_WORD (8*sizeof(bpf_u_int32)) 158 /* 159 * True if a is in uset {p} 160 */ 161 #define SET_MEMBER(p, a) \ 162 ((p)[(unsigned)(a) / BITS_PER_WORD] & (1 << ((unsigned)(a) % BITS_PER_WORD))) 163 164 /* 165 * Add 'a' to uset p. 166 */ 167 #define SET_INSERT(p, a) \ 168 (p)[(unsigned)(a) / BITS_PER_WORD] |= (1 << ((unsigned)(a) % BITS_PER_WORD)) 169 170 /* 171 * Delete 'a' from uset p. 172 */ 173 #define SET_DELETE(p, a) \ 174 (p)[(unsigned)(a) / BITS_PER_WORD] &= ~(1 << ((unsigned)(a) % BITS_PER_WORD)) 175 176 /* 177 * a := a intersect b 178 */ 179 #define SET_INTERSECT(a, b, n)\ 180 {\ 181 register bpf_u_int32 *_x = a, *_y = b;\ 182 register int _n = n;\ 183 while (--_n >= 0) *_x++ &= *_y++;\ 184 } 185 186 /* 187 * a := a - b 188 */ 189 #define SET_SUBTRACT(a, b, n)\ 190 {\ 191 register bpf_u_int32 *_x = a, *_y = b;\ 192 register int _n = n;\ 193 while (--_n >= 0) *_x++ &=~ *_y++;\ 194 } 195 196 /* 197 * a := a union b 198 */ 199 #define SET_UNION(a, b, n)\ 200 {\ 201 register bpf_u_int32 *_x = a, *_y = b;\ 202 register int _n = n;\ 203 while (--_n >= 0) *_x++ |= *_y++;\ 204 } 205 206 uset all_dom_sets; 207 uset all_closure_sets; 208 uset all_edge_sets; 209 210 #define MODULUS 213 211 struct valnode *hashtbl[MODULUS]; 212 int curval; 213 int maxval; 214 215 struct vmapinfo *vmap; 216 struct valnode *vnode_base; 217 struct valnode *next_vnode; 218 }; 219 220 typedef struct { 221 /* 222 * Some pointers used to convert the basic block form of the code, 223 * into the array form that BPF requires. 'fstart' will point to 224 * the malloc'd array while 'ftail' is used during the recursive 225 * traversal. 226 */ 227 struct bpf_insn *fstart; 228 struct bpf_insn *ftail; 229 } conv_state_t; 230 231 static void opt_init(compiler_state_t *, opt_state_t *, struct icode *); 232 static void opt_cleanup(opt_state_t *); 233 234 static void intern_blocks(opt_state_t *, struct icode *); 235 236 static void find_inedges(opt_state_t *, struct block *); 237 #ifdef BDEBUG 238 static void opt_dump(compiler_state_t *, struct icode *); 239 #endif 240 241 #ifndef MAX 242 #define MAX(a,b) ((a)>(b)?(a):(b)) 243 #endif 244 245 static void 246 find_levels_r(opt_state_t *opt_state, struct icode *ic, struct block *b) 247 { 248 int level; 249 250 if (isMarked(ic, b)) 251 return; 252 253 Mark(ic, b); 254 b->link = 0; 255 256 if (JT(b)) { 257 find_levels_r(opt_state, ic, JT(b)); 258 find_levels_r(opt_state, ic, JF(b)); 259 level = MAX(JT(b)->level, JF(b)->level) + 1; 260 } else 261 level = 0; 262 b->level = level; 263 b->link = opt_state->levels[level]; 264 opt_state->levels[level] = b; 265 } 266 267 /* 268 * Level graph. The levels go from 0 at the leaves to 269 * N_LEVELS at the root. The opt_state->levels[] array points to the 270 * first node of the level list, whose elements are linked 271 * with the 'link' field of the struct block. 272 */ 273 static void 274 find_levels(opt_state_t *opt_state, struct icode *ic) 275 { 276 memset((char *)opt_state->levels, 0, opt_state->n_blocks * sizeof(*opt_state->levels)); 277 unMarkAll(ic); 278 find_levels_r(opt_state, ic, ic->root); 279 } 280 281 /* 282 * Find dominator relationships. 283 * Assumes graph has been leveled. 284 */ 285 static void 286 find_dom(opt_state_t *opt_state, struct block *root) 287 { 288 int i; 289 struct block *b; 290 bpf_u_int32 *x; 291 292 /* 293 * Initialize sets to contain all nodes. 294 */ 295 x = opt_state->all_dom_sets; 296 i = opt_state->n_blocks * opt_state->nodewords; 297 while (--i >= 0) 298 *x++ = ~0; 299 /* Root starts off empty. */ 300 for (i = opt_state->nodewords; --i >= 0;) 301 root->dom[i] = 0; 302 303 /* root->level is the highest level no found. */ 304 for (i = root->level; i >= 0; --i) { 305 for (b = opt_state->levels[i]; b; b = b->link) { 306 SET_INSERT(b->dom, b->id); 307 if (JT(b) == 0) 308 continue; 309 SET_INTERSECT(JT(b)->dom, b->dom, opt_state->nodewords); 310 SET_INTERSECT(JF(b)->dom, b->dom, opt_state->nodewords); 311 } 312 } 313 } 314 315 static void 316 propedom(opt_state_t *opt_state, struct edge *ep) 317 { 318 SET_INSERT(ep->edom, ep->id); 319 if (ep->succ) { 320 SET_INTERSECT(ep->succ->et.edom, ep->edom, opt_state->edgewords); 321 SET_INTERSECT(ep->succ->ef.edom, ep->edom, opt_state->edgewords); 322 } 323 } 324 325 /* 326 * Compute edge dominators. 327 * Assumes graph has been leveled and predecessors established. 328 */ 329 static void 330 find_edom(opt_state_t *opt_state, struct block *root) 331 { 332 int i; 333 uset x; 334 struct block *b; 335 336 x = opt_state->all_edge_sets; 337 for (i = opt_state->n_edges * opt_state->edgewords; --i >= 0; ) 338 x[i] = ~0; 339 340 /* root->level is the highest level no found. */ 341 memset(root->et.edom, 0, opt_state->edgewords * sizeof(*(uset)0)); 342 memset(root->ef.edom, 0, opt_state->edgewords * sizeof(*(uset)0)); 343 for (i = root->level; i >= 0; --i) { 344 for (b = opt_state->levels[i]; b != 0; b = b->link) { 345 propedom(opt_state, &b->et); 346 propedom(opt_state, &b->ef); 347 } 348 } 349 } 350 351 /* 352 * Find the backwards transitive closure of the flow graph. These sets 353 * are backwards in the sense that we find the set of nodes that reach 354 * a given node, not the set of nodes that can be reached by a node. 355 * 356 * Assumes graph has been leveled. 357 */ 358 static void 359 find_closure(opt_state_t *opt_state, struct block *root) 360 { 361 int i; 362 struct block *b; 363 364 /* 365 * Initialize sets to contain no nodes. 366 */ 367 memset((char *)opt_state->all_closure_sets, 0, 368 opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->all_closure_sets)); 369 370 /* root->level is the highest level no found. */ 371 for (i = root->level; i >= 0; --i) { 372 for (b = opt_state->levels[i]; b; b = b->link) { 373 SET_INSERT(b->closure, b->id); 374 if (JT(b) == 0) 375 continue; 376 SET_UNION(JT(b)->closure, b->closure, opt_state->nodewords); 377 SET_UNION(JF(b)->closure, b->closure, opt_state->nodewords); 378 } 379 } 380 } 381 382 /* 383 * Return the register number that is used by s. If A and X are both 384 * used, return AX_ATOM. If no register is used, return -1. 385 * 386 * The implementation should probably change to an array access. 387 */ 388 static int 389 atomuse(struct stmt *s) 390 { 391 register int c = s->code; 392 393 if (c == NOP) 394 return -1; 395 396 switch (BPF_CLASS(c)) { 397 398 case BPF_RET: 399 return (BPF_RVAL(c) == BPF_A) ? A_ATOM : 400 (BPF_RVAL(c) == BPF_X) ? X_ATOM : -1; 401 402 case BPF_LD: 403 case BPF_LDX: 404 return (BPF_MODE(c) == BPF_IND) ? X_ATOM : 405 (BPF_MODE(c) == BPF_MEM) ? s->k : -1; 406 407 case BPF_ST: 408 return A_ATOM; 409 410 case BPF_STX: 411 return X_ATOM; 412 413 case BPF_JMP: 414 case BPF_ALU: 415 if (BPF_SRC(c) == BPF_X) 416 return AX_ATOM; 417 return A_ATOM; 418 419 case BPF_MISC: 420 return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM; 421 } 422 abort(); 423 /* NOTREACHED */ 424 } 425 426 /* 427 * Return the register number that is defined by 's'. We assume that 428 * a single stmt cannot define more than one register. If no register 429 * is defined, return -1. 430 * 431 * The implementation should probably change to an array access. 432 */ 433 static int 434 atomdef(struct stmt *s) 435 { 436 if (s->code == NOP) 437 return -1; 438 439 switch (BPF_CLASS(s->code)) { 440 441 case BPF_LD: 442 case BPF_ALU: 443 return A_ATOM; 444 445 case BPF_LDX: 446 return X_ATOM; 447 448 case BPF_ST: 449 case BPF_STX: 450 return s->k; 451 452 case BPF_MISC: 453 return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM; 454 } 455 return -1; 456 } 457 458 /* 459 * Compute the sets of registers used, defined, and killed by 'b'. 460 * 461 * "Used" means that a statement in 'b' uses the register before any 462 * statement in 'b' defines it, i.e. it uses the value left in 463 * that register by a predecessor block of this block. 464 * "Defined" means that a statement in 'b' defines it. 465 * "Killed" means that a statement in 'b' defines it before any 466 * statement in 'b' uses it, i.e. it kills the value left in that 467 * register by a predecessor block of this block. 468 */ 469 static void 470 compute_local_ud(struct block *b) 471 { 472 struct slist *s; 473 atomset def = 0, use = 0, killed = 0; 474 int atom; 475 476 for (s = b->stmts; s; s = s->next) { 477 if (s->s.code == NOP) 478 continue; 479 atom = atomuse(&s->s); 480 if (atom >= 0) { 481 if (atom == AX_ATOM) { 482 if (!ATOMELEM(def, X_ATOM)) 483 use |= ATOMMASK(X_ATOM); 484 if (!ATOMELEM(def, A_ATOM)) 485 use |= ATOMMASK(A_ATOM); 486 } 487 else if (atom < N_ATOMS) { 488 if (!ATOMELEM(def, atom)) 489 use |= ATOMMASK(atom); 490 } 491 else 492 abort(); 493 } 494 atom = atomdef(&s->s); 495 if (atom >= 0) { 496 if (!ATOMELEM(use, atom)) 497 killed |= ATOMMASK(atom); 498 def |= ATOMMASK(atom); 499 } 500 } 501 if (BPF_CLASS(b->s.code) == BPF_JMP) { 502 /* 503 * XXX - what about RET? 504 */ 505 atom = atomuse(&b->s); 506 if (atom >= 0) { 507 if (atom == AX_ATOM) { 508 if (!ATOMELEM(def, X_ATOM)) 509 use |= ATOMMASK(X_ATOM); 510 if (!ATOMELEM(def, A_ATOM)) 511 use |= ATOMMASK(A_ATOM); 512 } 513 else if (atom < N_ATOMS) { 514 if (!ATOMELEM(def, atom)) 515 use |= ATOMMASK(atom); 516 } 517 else 518 abort(); 519 } 520 } 521 522 b->def = def; 523 b->kill = killed; 524 b->in_use = use; 525 } 526 527 /* 528 * Assume graph is already leveled. 529 */ 530 static void 531 find_ud(opt_state_t *opt_state, struct block *root) 532 { 533 int i, maxlevel; 534 struct block *p; 535 536 /* 537 * root->level is the highest level no found; 538 * count down from there. 539 */ 540 maxlevel = root->level; 541 for (i = maxlevel; i >= 0; --i) 542 for (p = opt_state->levels[i]; p; p = p->link) { 543 compute_local_ud(p); 544 p->out_use = 0; 545 } 546 547 for (i = 1; i <= maxlevel; ++i) { 548 for (p = opt_state->levels[i]; p; p = p->link) { 549 p->out_use |= JT(p)->in_use | JF(p)->in_use; 550 p->in_use |= p->out_use &~ p->kill; 551 } 552 } 553 } 554 static void 555 init_val(opt_state_t *opt_state) 556 { 557 opt_state->curval = 0; 558 opt_state->next_vnode = opt_state->vnode_base; 559 memset((char *)opt_state->vmap, 0, opt_state->maxval * sizeof(*opt_state->vmap)); 560 memset((char *)opt_state->hashtbl, 0, sizeof opt_state->hashtbl); 561 } 562 563 /* Because we really don't have an IR, this stuff is a little messy. */ 564 static int 565 F(opt_state_t *opt_state, int code, int v0, int v1) 566 { 567 u_int hash; 568 int val; 569 struct valnode *p; 570 571 hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8); 572 hash %= MODULUS; 573 574 for (p = opt_state->hashtbl[hash]; p; p = p->next) 575 if (p->code == code && p->v0 == v0 && p->v1 == v1) 576 return p->val; 577 578 val = ++opt_state->curval; 579 if (BPF_MODE(code) == BPF_IMM && 580 (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) { 581 opt_state->vmap[val].const_val = v0; 582 opt_state->vmap[val].is_const = 1; 583 } 584 p = opt_state->next_vnode++; 585 p->val = val; 586 p->code = code; 587 p->v0 = v0; 588 p->v1 = v1; 589 p->next = opt_state->hashtbl[hash]; 590 opt_state->hashtbl[hash] = p; 591 592 return val; 593 } 594 595 static inline void 596 vstore(struct stmt *s, int *valp, int newval, int alter) 597 { 598 if (alter && *valp == newval) 599 s->code = NOP; 600 else 601 *valp = newval; 602 } 603 604 /* 605 * Do constant-folding on binary operators. 606 * (Unary operators are handled elsewhere.) 607 */ 608 static void 609 fold_op(compiler_state_t *cstate, struct icode *ic, opt_state_t *opt_state, 610 struct stmt *s, int v0, int v1) 611 { 612 bpf_u_int32 a, b; 613 614 a = opt_state->vmap[v0].const_val; 615 b = opt_state->vmap[v1].const_val; 616 617 switch (BPF_OP(s->code)) { 618 case BPF_ADD: 619 a += b; 620 break; 621 622 case BPF_SUB: 623 a -= b; 624 break; 625 626 case BPF_MUL: 627 a *= b; 628 break; 629 630 case BPF_DIV: 631 if (b == 0) 632 bpf_error(cstate, "division by zero"); 633 a /= b; 634 break; 635 636 case BPF_MOD: 637 if (b == 0) 638 bpf_error(cstate, "modulus by zero"); 639 a %= b; 640 break; 641 642 case BPF_AND: 643 a &= b; 644 break; 645 646 case BPF_OR: 647 a |= b; 648 break; 649 650 case BPF_XOR: 651 a ^= b; 652 break; 653 654 case BPF_LSH: 655 a <<= b; 656 break; 657 658 case BPF_RSH: 659 a >>= b; 660 break; 661 662 default: 663 abort(); 664 } 665 s->k = a; 666 s->code = BPF_LD|BPF_IMM; 667 opt_state->done = 0; 668 } 669 670 static inline struct slist * 671 this_op(struct slist *s) 672 { 673 while (s != 0 && s->s.code == NOP) 674 s = s->next; 675 return s; 676 } 677 678 static void 679 opt_not(struct block *b) 680 { 681 struct block *tmp = JT(b); 682 683 JT(b) = JF(b); 684 JF(b) = tmp; 685 } 686 687 static void 688 opt_peep(opt_state_t *opt_state, struct block *b) 689 { 690 struct slist *s; 691 struct slist *next, *last; 692 int val; 693 694 s = b->stmts; 695 if (s == 0) 696 return; 697 698 last = s; 699 for (/*empty*/; /*empty*/; s = next) { 700 /* 701 * Skip over nops. 702 */ 703 s = this_op(s); 704 if (s == 0) 705 break; /* nothing left in the block */ 706 707 /* 708 * Find the next real instruction after that one 709 * (skipping nops). 710 */ 711 next = this_op(s->next); 712 if (next == 0) 713 break; /* no next instruction */ 714 last = next; 715 716 /* 717 * st M[k] --> st M[k] 718 * ldx M[k] tax 719 */ 720 if (s->s.code == BPF_ST && 721 next->s.code == (BPF_LDX|BPF_MEM) && 722 s->s.k == next->s.k) { 723 opt_state->done = 0; 724 next->s.code = BPF_MISC|BPF_TAX; 725 } 726 /* 727 * ld #k --> ldx #k 728 * tax txa 729 */ 730 if (s->s.code == (BPF_LD|BPF_IMM) && 731 next->s.code == (BPF_MISC|BPF_TAX)) { 732 s->s.code = BPF_LDX|BPF_IMM; 733 next->s.code = BPF_MISC|BPF_TXA; 734 opt_state->done = 0; 735 } 736 /* 737 * This is an ugly special case, but it happens 738 * when you say tcp[k] or udp[k] where k is a constant. 739 */ 740 if (s->s.code == (BPF_LD|BPF_IMM)) { 741 struct slist *add, *tax, *ild; 742 743 /* 744 * Check that X isn't used on exit from this 745 * block (which the optimizer might cause). 746 * We know the code generator won't generate 747 * any local dependencies. 748 */ 749 if (ATOMELEM(b->out_use, X_ATOM)) 750 continue; 751 752 /* 753 * Check that the instruction following the ldi 754 * is an addx, or it's an ldxms with an addx 755 * following it (with 0 or more nops between the 756 * ldxms and addx). 757 */ 758 if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B)) 759 add = next; 760 else 761 add = this_op(next->next); 762 if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X)) 763 continue; 764 765 /* 766 * Check that a tax follows that (with 0 or more 767 * nops between them). 768 */ 769 tax = this_op(add->next); 770 if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX)) 771 continue; 772 773 /* 774 * Check that an ild follows that (with 0 or more 775 * nops between them). 776 */ 777 ild = this_op(tax->next); 778 if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD || 779 BPF_MODE(ild->s.code) != BPF_IND) 780 continue; 781 /* 782 * We want to turn this sequence: 783 * 784 * (004) ldi #0x2 {s} 785 * (005) ldxms [14] {next} -- optional 786 * (006) addx {add} 787 * (007) tax {tax} 788 * (008) ild [x+0] {ild} 789 * 790 * into this sequence: 791 * 792 * (004) nop 793 * (005) ldxms [14] 794 * (006) nop 795 * (007) nop 796 * (008) ild [x+2] 797 * 798 * XXX We need to check that X is not 799 * subsequently used, because we want to change 800 * what'll be in it after this sequence. 801 * 802 * We know we can eliminate the accumulator 803 * modifications earlier in the sequence since 804 * it is defined by the last stmt of this sequence 805 * (i.e., the last statement of the sequence loads 806 * a value into the accumulator, so we can eliminate 807 * earlier operations on the accumulator). 808 */ 809 ild->s.k += s->s.k; 810 s->s.code = NOP; 811 add->s.code = NOP; 812 tax->s.code = NOP; 813 opt_state->done = 0; 814 } 815 } 816 /* 817 * If the comparison at the end of a block is an equality 818 * comparison against a constant, and nobody uses the value 819 * we leave in the A register at the end of a block, and 820 * the operation preceding the comparison is an arithmetic 821 * operation, we can sometime optimize it away. 822 */ 823 if (b->s.code == (BPF_JMP|BPF_JEQ|BPF_K) && 824 !ATOMELEM(b->out_use, A_ATOM)) { 825 /* 826 * We can optimize away certain subtractions of the 827 * X register. 828 */ 829 if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X)) { 830 val = b->val[X_ATOM]; 831 if (opt_state->vmap[val].is_const) { 832 /* 833 * If we have a subtract to do a comparison, 834 * and the X register is a known constant, 835 * we can merge this value into the 836 * comparison: 837 * 838 * sub x -> nop 839 * jeq #y jeq #(x+y) 840 */ 841 b->s.k += opt_state->vmap[val].const_val; 842 last->s.code = NOP; 843 opt_state->done = 0; 844 } else if (b->s.k == 0) { 845 /* 846 * If the X register isn't a constant, 847 * and the comparison in the test is 848 * against 0, we can compare with the 849 * X register, instead: 850 * 851 * sub x -> nop 852 * jeq #0 jeq x 853 */ 854 last->s.code = NOP; 855 b->s.code = BPF_JMP|BPF_JEQ|BPF_X; 856 opt_state->done = 0; 857 } 858 } 859 /* 860 * Likewise, a constant subtract can be simplified: 861 * 862 * sub #x -> nop 863 * jeq #y -> jeq #(x+y) 864 */ 865 else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K)) { 866 last->s.code = NOP; 867 b->s.k += last->s.k; 868 opt_state->done = 0; 869 } 870 /* 871 * And, similarly, a constant AND can be simplified 872 * if we're testing against 0, i.e.: 873 * 874 * and #k nop 875 * jeq #0 -> jset #k 876 */ 877 else if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) && 878 b->s.k == 0) { 879 b->s.k = last->s.k; 880 b->s.code = BPF_JMP|BPF_K|BPF_JSET; 881 last->s.code = NOP; 882 opt_state->done = 0; 883 opt_not(b); 884 } 885 } 886 /* 887 * jset #0 -> never 888 * jset #ffffffff -> always 889 */ 890 if (b->s.code == (BPF_JMP|BPF_K|BPF_JSET)) { 891 if (b->s.k == 0) 892 JT(b) = JF(b); 893 if ((u_int)b->s.k == 0xffffffffU) 894 JF(b) = JT(b); 895 } 896 /* 897 * If we're comparing against the index register, and the index 898 * register is a known constant, we can just compare against that 899 * constant. 900 */ 901 val = b->val[X_ATOM]; 902 if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_X) { 903 bpf_int32 v = opt_state->vmap[val].const_val; 904 b->s.code &= ~BPF_X; 905 b->s.k = v; 906 } 907 /* 908 * If the accumulator is a known constant, we can compute the 909 * comparison result. 910 */ 911 val = b->val[A_ATOM]; 912 if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) { 913 bpf_int32 v = opt_state->vmap[val].const_val; 914 switch (BPF_OP(b->s.code)) { 915 916 case BPF_JEQ: 917 v = v == b->s.k; 918 break; 919 920 case BPF_JGT: 921 v = (unsigned)v > (unsigned)b->s.k; 922 break; 923 924 case BPF_JGE: 925 v = (unsigned)v >= (unsigned)b->s.k; 926 break; 927 928 case BPF_JSET: 929 v &= b->s.k; 930 break; 931 932 default: 933 abort(); 934 } 935 if (JF(b) != JT(b)) 936 opt_state->done = 0; 937 if (v) 938 JF(b) = JT(b); 939 else 940 JT(b) = JF(b); 941 } 942 } 943 944 /* 945 * Compute the symbolic value of expression of 's', and update 946 * anything it defines in the value table 'val'. If 'alter' is true, 947 * do various optimizations. This code would be cleaner if symbolic 948 * evaluation and code transformations weren't folded together. 949 */ 950 static void 951 opt_stmt(compiler_state_t *cstate, struct icode *ic, opt_state_t *opt_state, 952 struct stmt *s, int val[], int alter) 953 { 954 int op; 955 int v; 956 957 switch (s->code) { 958 959 case BPF_LD|BPF_ABS|BPF_W: 960 case BPF_LD|BPF_ABS|BPF_H: 961 case BPF_LD|BPF_ABS|BPF_B: 962 v = F(opt_state, s->code, s->k, 0L); 963 vstore(s, &val[A_ATOM], v, alter); 964 break; 965 966 case BPF_LD|BPF_IND|BPF_W: 967 case BPF_LD|BPF_IND|BPF_H: 968 case BPF_LD|BPF_IND|BPF_B: 969 v = val[X_ATOM]; 970 if (alter && opt_state->vmap[v].is_const) { 971 s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code); 972 s->k += opt_state->vmap[v].const_val; 973 v = F(opt_state, s->code, s->k, 0L); 974 opt_state->done = 0; 975 } 976 else 977 v = F(opt_state, s->code, s->k, v); 978 vstore(s, &val[A_ATOM], v, alter); 979 break; 980 981 case BPF_LD|BPF_LEN: 982 v = F(opt_state, s->code, 0L, 0L); 983 vstore(s, &val[A_ATOM], v, alter); 984 break; 985 986 case BPF_LD|BPF_IMM: 987 v = K(s->k); 988 vstore(s, &val[A_ATOM], v, alter); 989 break; 990 991 case BPF_LDX|BPF_IMM: 992 v = K(s->k); 993 vstore(s, &val[X_ATOM], v, alter); 994 break; 995 996 case BPF_LDX|BPF_MSH|BPF_B: 997 v = F(opt_state, s->code, s->k, 0L); 998 vstore(s, &val[X_ATOM], v, alter); 999 break; 1000 1001 case BPF_ALU|BPF_NEG: 1002 if (alter && opt_state->vmap[val[A_ATOM]].is_const) { 1003 s->code = BPF_LD|BPF_IMM; 1004 s->k = -opt_state->vmap[val[A_ATOM]].const_val; 1005 val[A_ATOM] = K(s->k); 1006 } 1007 else 1008 val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], 0L); 1009 break; 1010 1011 case BPF_ALU|BPF_ADD|BPF_K: 1012 case BPF_ALU|BPF_SUB|BPF_K: 1013 case BPF_ALU|BPF_MUL|BPF_K: 1014 case BPF_ALU|BPF_DIV|BPF_K: 1015 case BPF_ALU|BPF_MOD|BPF_K: 1016 case BPF_ALU|BPF_AND|BPF_K: 1017 case BPF_ALU|BPF_OR|BPF_K: 1018 case BPF_ALU|BPF_XOR|BPF_K: 1019 case BPF_ALU|BPF_LSH|BPF_K: 1020 case BPF_ALU|BPF_RSH|BPF_K: 1021 op = BPF_OP(s->code); 1022 if (alter) { 1023 if (s->k == 0) { 1024 /* don't optimize away "sub #0" 1025 * as it may be needed later to 1026 * fixup the generated math code */ 1027 if (op == BPF_ADD || 1028 op == BPF_LSH || op == BPF_RSH || 1029 op == BPF_OR || op == BPF_XOR) { 1030 s->code = NOP; 1031 break; 1032 } 1033 if (op == BPF_MUL || op == BPF_AND) { 1034 s->code = BPF_LD|BPF_IMM; 1035 val[A_ATOM] = K(s->k); 1036 break; 1037 } 1038 } 1039 if (opt_state->vmap[val[A_ATOM]].is_const) { 1040 fold_op(cstate, ic, opt_state, s, val[A_ATOM], K(s->k)); 1041 val[A_ATOM] = K(s->k); 1042 break; 1043 } 1044 } 1045 val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], K(s->k)); 1046 break; 1047 1048 case BPF_ALU|BPF_ADD|BPF_X: 1049 case BPF_ALU|BPF_SUB|BPF_X: 1050 case BPF_ALU|BPF_MUL|BPF_X: 1051 case BPF_ALU|BPF_DIV|BPF_X: 1052 case BPF_ALU|BPF_MOD|BPF_X: 1053 case BPF_ALU|BPF_AND|BPF_X: 1054 case BPF_ALU|BPF_OR|BPF_X: 1055 case BPF_ALU|BPF_XOR|BPF_X: 1056 case BPF_ALU|BPF_LSH|BPF_X: 1057 case BPF_ALU|BPF_RSH|BPF_X: 1058 op = BPF_OP(s->code); 1059 if (alter && opt_state->vmap[val[X_ATOM]].is_const) { 1060 if (opt_state->vmap[val[A_ATOM]].is_const) { 1061 fold_op(cstate, ic, opt_state, s, val[A_ATOM], val[X_ATOM]); 1062 val[A_ATOM] = K(s->k); 1063 } 1064 else { 1065 s->code = BPF_ALU|BPF_K|op; 1066 s->k = opt_state->vmap[val[X_ATOM]].const_val; 1067 opt_state->done = 0; 1068 val[A_ATOM] = 1069 F(opt_state, s->code, val[A_ATOM], K(s->k)); 1070 } 1071 break; 1072 } 1073 /* 1074 * Check if we're doing something to an accumulator 1075 * that is 0, and simplify. This may not seem like 1076 * much of a simplification but it could open up further 1077 * optimizations. 1078 * XXX We could also check for mul by 1, etc. 1079 */ 1080 if (alter && opt_state->vmap[val[A_ATOM]].is_const 1081 && opt_state->vmap[val[A_ATOM]].const_val == 0) { 1082 if (op == BPF_ADD || op == BPF_OR || op == BPF_XOR) { 1083 s->code = BPF_MISC|BPF_TXA; 1084 vstore(s, &val[A_ATOM], val[X_ATOM], alter); 1085 break; 1086 } 1087 else if (op == BPF_MUL || op == BPF_DIV || op == BPF_MOD || 1088 op == BPF_AND || op == BPF_LSH || op == BPF_RSH) { 1089 s->code = BPF_LD|BPF_IMM; 1090 s->k = 0; 1091 vstore(s, &val[A_ATOM], K(s->k), alter); 1092 break; 1093 } 1094 else if (op == BPF_NEG) { 1095 s->code = NOP; 1096 break; 1097 } 1098 } 1099 val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], val[X_ATOM]); 1100 break; 1101 1102 case BPF_MISC|BPF_TXA: 1103 vstore(s, &val[A_ATOM], val[X_ATOM], alter); 1104 break; 1105 1106 case BPF_LD|BPF_MEM: 1107 v = val[s->k]; 1108 if (alter && opt_state->vmap[v].is_const) { 1109 s->code = BPF_LD|BPF_IMM; 1110 s->k = opt_state->vmap[v].const_val; 1111 opt_state->done = 0; 1112 } 1113 vstore(s, &val[A_ATOM], v, alter); 1114 break; 1115 1116 case BPF_MISC|BPF_TAX: 1117 vstore(s, &val[X_ATOM], val[A_ATOM], alter); 1118 break; 1119 1120 case BPF_LDX|BPF_MEM: 1121 v = val[s->k]; 1122 if (alter && opt_state->vmap[v].is_const) { 1123 s->code = BPF_LDX|BPF_IMM; 1124 s->k = opt_state->vmap[v].const_val; 1125 opt_state->done = 0; 1126 } 1127 vstore(s, &val[X_ATOM], v, alter); 1128 break; 1129 1130 case BPF_ST: 1131 vstore(s, &val[s->k], val[A_ATOM], alter); 1132 break; 1133 1134 case BPF_STX: 1135 vstore(s, &val[s->k], val[X_ATOM], alter); 1136 break; 1137 } 1138 } 1139 1140 static void 1141 deadstmt(opt_state_t *opt_state, register struct stmt *s, register struct stmt *last[]) 1142 { 1143 register int atom; 1144 1145 atom = atomuse(s); 1146 if (atom >= 0) { 1147 if (atom == AX_ATOM) { 1148 last[X_ATOM] = 0; 1149 last[A_ATOM] = 0; 1150 } 1151 else 1152 last[atom] = 0; 1153 } 1154 atom = atomdef(s); 1155 if (atom >= 0) { 1156 if (last[atom]) { 1157 opt_state->done = 0; 1158 last[atom]->code = NOP; 1159 } 1160 last[atom] = s; 1161 } 1162 } 1163 1164 static void 1165 opt_deadstores(opt_state_t *opt_state, register struct block *b) 1166 { 1167 register struct slist *s; 1168 register int atom; 1169 struct stmt *last[N_ATOMS]; 1170 1171 memset((char *)last, 0, sizeof last); 1172 1173 for (s = b->stmts; s != 0; s = s->next) 1174 deadstmt(opt_state, &s->s, last); 1175 deadstmt(opt_state, &b->s, last); 1176 1177 for (atom = 0; atom < N_ATOMS; ++atom) 1178 if (last[atom] && !ATOMELEM(b->out_use, atom)) { 1179 last[atom]->code = NOP; 1180 opt_state->done = 0; 1181 } 1182 } 1183 1184 static void 1185 opt_blk(compiler_state_t *cstate, struct icode *ic, opt_state_t *opt_state, 1186 struct block *b, int do_stmts) 1187 { 1188 struct slist *s; 1189 struct edge *p; 1190 int i; 1191 bpf_int32 aval, xval; 1192 1193 #if 0 1194 for (s = b->stmts; s && s->next; s = s->next) 1195 if (BPF_CLASS(s->s.code) == BPF_JMP) { 1196 do_stmts = 0; 1197 break; 1198 } 1199 #endif 1200 1201 /* 1202 * Initialize the atom values. 1203 */ 1204 p = b->in_edges; 1205 if (p == 0) { 1206 /* 1207 * We have no predecessors, so everything is undefined 1208 * upon entry to this block. 1209 */ 1210 memset((char *)b->val, 0, sizeof(b->val)); 1211 } else { 1212 /* 1213 * Inherit values from our predecessors. 1214 * 1215 * First, get the values from the predecessor along the 1216 * first edge leading to this node. 1217 */ 1218 memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val)); 1219 /* 1220 * Now look at all the other nodes leading to this node. 1221 * If, for the predecessor along that edge, a register 1222 * has a different value from the one we have (i.e., 1223 * control paths are merging, and the merging paths 1224 * assign different values to that register), give the 1225 * register the undefined value of 0. 1226 */ 1227 while ((p = p->next) != NULL) { 1228 for (i = 0; i < N_ATOMS; ++i) 1229 if (b->val[i] != p->pred->val[i]) 1230 b->val[i] = 0; 1231 } 1232 } 1233 aval = b->val[A_ATOM]; 1234 xval = b->val[X_ATOM]; 1235 for (s = b->stmts; s; s = s->next) 1236 opt_stmt(cstate, ic, opt_state, &s->s, b->val, do_stmts); 1237 1238 /* 1239 * This is a special case: if we don't use anything from this 1240 * block, and we load the accumulator or index register with a 1241 * value that is already there, or if this block is a return, 1242 * eliminate all the statements. 1243 * 1244 * XXX - what if it does a store? 1245 * 1246 * XXX - why does it matter whether we use anything from this 1247 * block? If the accumulator or index register doesn't change 1248 * its value, isn't that OK even if we use that value? 1249 * 1250 * XXX - if we load the accumulator with a different value, 1251 * and the block ends with a conditional branch, we obviously 1252 * can't eliminate it, as the branch depends on that value. 1253 * For the index register, the conditional branch only depends 1254 * on the index register value if the test is against the index 1255 * register value rather than a constant; if nothing uses the 1256 * value we put into the index register, and we're not testing 1257 * against the index register's value, and there aren't any 1258 * other problems that would keep us from eliminating this 1259 * block, can we eliminate it? 1260 */ 1261 if (do_stmts && 1262 ((b->out_use == 0 && aval != 0 && b->val[A_ATOM] == aval && 1263 xval != 0 && b->val[X_ATOM] == xval) || 1264 BPF_CLASS(b->s.code) == BPF_RET)) { 1265 if (b->stmts != 0) { 1266 b->stmts = 0; 1267 opt_state->done = 0; 1268 } 1269 } else { 1270 opt_peep(opt_state, b); 1271 opt_deadstores(opt_state, b); 1272 } 1273 /* 1274 * Set up values for branch optimizer. 1275 */ 1276 if (BPF_SRC(b->s.code) == BPF_K) 1277 b->oval = K(b->s.k); 1278 else 1279 b->oval = b->val[X_ATOM]; 1280 b->et.code = b->s.code; 1281 b->ef.code = -b->s.code; 1282 } 1283 1284 /* 1285 * Return true if any register that is used on exit from 'succ', has 1286 * an exit value that is different from the corresponding exit value 1287 * from 'b'. 1288 */ 1289 static int 1290 use_conflict(struct block *b, struct block *succ) 1291 { 1292 int atom; 1293 atomset use = succ->out_use; 1294 1295 if (use == 0) 1296 return 0; 1297 1298 for (atom = 0; atom < N_ATOMS; ++atom) 1299 if (ATOMELEM(use, atom)) 1300 if (b->val[atom] != succ->val[atom]) 1301 return 1; 1302 return 0; 1303 } 1304 1305 static struct block * 1306 fold_edge(struct block *child, struct edge *ep) 1307 { 1308 int sense; 1309 int aval0, aval1, oval0, oval1; 1310 int code = ep->code; 1311 1312 if (code < 0) { 1313 code = -code; 1314 sense = 0; 1315 } else 1316 sense = 1; 1317 1318 if (child->s.code != code) 1319 return 0; 1320 1321 aval0 = child->val[A_ATOM]; 1322 oval0 = child->oval; 1323 aval1 = ep->pred->val[A_ATOM]; 1324 oval1 = ep->pred->oval; 1325 1326 if (aval0 != aval1) 1327 return 0; 1328 1329 if (oval0 == oval1) 1330 /* 1331 * The operands of the branch instructions are 1332 * identical, so the result is true if a true 1333 * branch was taken to get here, otherwise false. 1334 */ 1335 return sense ? JT(child) : JF(child); 1336 1337 if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K)) 1338 /* 1339 * At this point, we only know the comparison if we 1340 * came down the true branch, and it was an equality 1341 * comparison with a constant. 1342 * 1343 * I.e., if we came down the true branch, and the branch 1344 * was an equality comparison with a constant, we know the 1345 * accumulator contains that constant. If we came down 1346 * the false branch, or the comparison wasn't with a 1347 * constant, we don't know what was in the accumulator. 1348 * 1349 * We rely on the fact that distinct constants have distinct 1350 * value numbers. 1351 */ 1352 return JF(child); 1353 1354 return 0; 1355 } 1356 1357 static void 1358 opt_j(opt_state_t *opt_state, struct edge *ep) 1359 { 1360 register int i, k; 1361 register struct block *target; 1362 1363 if (JT(ep->succ) == 0) 1364 return; 1365 1366 if (JT(ep->succ) == JF(ep->succ)) { 1367 /* 1368 * Common branch targets can be eliminated, provided 1369 * there is no data dependency. 1370 */ 1371 if (!use_conflict(ep->pred, ep->succ->et.succ)) { 1372 opt_state->done = 0; 1373 ep->succ = JT(ep->succ); 1374 } 1375 } 1376 /* 1377 * For each edge dominator that matches the successor of this 1378 * edge, promote the edge successor to the its grandchild. 1379 * 1380 * XXX We violate the set abstraction here in favor a reasonably 1381 * efficient loop. 1382 */ 1383 top: 1384 for (i = 0; i < opt_state->edgewords; ++i) { 1385 register bpf_u_int32 x = ep->edom[i]; 1386 1387 while (x != 0) { 1388 k = ffs(x) - 1; 1389 x &=~ (1 << k); 1390 k += i * BITS_PER_WORD; 1391 1392 target = fold_edge(ep->succ, opt_state->edges[k]); 1393 /* 1394 * Check that there is no data dependency between 1395 * nodes that will be violated if we move the edge. 1396 */ 1397 if (target != 0 && !use_conflict(ep->pred, target)) { 1398 opt_state->done = 0; 1399 ep->succ = target; 1400 if (JT(target) != 0) 1401 /* 1402 * Start over unless we hit a leaf. 1403 */ 1404 goto top; 1405 return; 1406 } 1407 } 1408 } 1409 } 1410 1411 1412 static void 1413 or_pullup(opt_state_t *opt_state, struct block *b) 1414 { 1415 int val, at_top; 1416 struct block *pull; 1417 struct block **diffp, **samep; 1418 struct edge *ep; 1419 1420 ep = b->in_edges; 1421 if (ep == 0) 1422 return; 1423 1424 /* 1425 * Make sure each predecessor loads the same value. 1426 * XXX why? 1427 */ 1428 val = ep->pred->val[A_ATOM]; 1429 for (ep = ep->next; ep != 0; ep = ep->next) 1430 if (val != ep->pred->val[A_ATOM]) 1431 return; 1432 1433 if (JT(b->in_edges->pred) == b) 1434 diffp = &JT(b->in_edges->pred); 1435 else 1436 diffp = &JF(b->in_edges->pred); 1437 1438 at_top = 1; 1439 while (1) { 1440 if (*diffp == 0) 1441 return; 1442 1443 if (JT(*diffp) != JT(b)) 1444 return; 1445 1446 if (!SET_MEMBER((*diffp)->dom, b->id)) 1447 return; 1448 1449 if ((*diffp)->val[A_ATOM] != val) 1450 break; 1451 1452 diffp = &JF(*diffp); 1453 at_top = 0; 1454 } 1455 samep = &JF(*diffp); 1456 while (1) { 1457 if (*samep == 0) 1458 return; 1459 1460 if (JT(*samep) != JT(b)) 1461 return; 1462 1463 if (!SET_MEMBER((*samep)->dom, b->id)) 1464 return; 1465 1466 if ((*samep)->val[A_ATOM] == val) 1467 break; 1468 1469 /* XXX Need to check that there are no data dependencies 1470 between dp0 and dp1. Currently, the code generator 1471 will not produce such dependencies. */ 1472 samep = &JF(*samep); 1473 } 1474 #ifdef notdef 1475 /* XXX This doesn't cover everything. */ 1476 for (i = 0; i < N_ATOMS; ++i) 1477 if ((*samep)->val[i] != pred->val[i]) 1478 return; 1479 #endif 1480 /* Pull up the node. */ 1481 pull = *samep; 1482 *samep = JF(pull); 1483 JF(pull) = *diffp; 1484 1485 /* 1486 * At the top of the chain, each predecessor needs to point at the 1487 * pulled up node. Inside the chain, there is only one predecessor 1488 * to worry about. 1489 */ 1490 if (at_top) { 1491 for (ep = b->in_edges; ep != 0; ep = ep->next) { 1492 if (JT(ep->pred) == b) 1493 JT(ep->pred) = pull; 1494 else 1495 JF(ep->pred) = pull; 1496 } 1497 } 1498 else 1499 *diffp = pull; 1500 1501 opt_state->done = 0; 1502 } 1503 1504 static void 1505 and_pullup(opt_state_t *opt_state, struct block *b) 1506 { 1507 int val, at_top; 1508 struct block *pull; 1509 struct block **diffp, **samep; 1510 struct edge *ep; 1511 1512 ep = b->in_edges; 1513 if (ep == 0) 1514 return; 1515 1516 /* 1517 * Make sure each predecessor loads the same value. 1518 */ 1519 val = ep->pred->val[A_ATOM]; 1520 for (ep = ep->next; ep != 0; ep = ep->next) 1521 if (val != ep->pred->val[A_ATOM]) 1522 return; 1523 1524 if (JT(b->in_edges->pred) == b) 1525 diffp = &JT(b->in_edges->pred); 1526 else 1527 diffp = &JF(b->in_edges->pred); 1528 1529 at_top = 1; 1530 while (1) { 1531 if (*diffp == 0) 1532 return; 1533 1534 if (JF(*diffp) != JF(b)) 1535 return; 1536 1537 if (!SET_MEMBER((*diffp)->dom, b->id)) 1538 return; 1539 1540 if ((*diffp)->val[A_ATOM] != val) 1541 break; 1542 1543 diffp = &JT(*diffp); 1544 at_top = 0; 1545 } 1546 samep = &JT(*diffp); 1547 while (1) { 1548 if (*samep == 0) 1549 return; 1550 1551 if (JF(*samep) != JF(b)) 1552 return; 1553 1554 if (!SET_MEMBER((*samep)->dom, b->id)) 1555 return; 1556 1557 if ((*samep)->val[A_ATOM] == val) 1558 break; 1559 1560 /* XXX Need to check that there are no data dependencies 1561 between diffp and samep. Currently, the code generator 1562 will not produce such dependencies. */ 1563 samep = &JT(*samep); 1564 } 1565 #ifdef notdef 1566 /* XXX This doesn't cover everything. */ 1567 for (i = 0; i < N_ATOMS; ++i) 1568 if ((*samep)->val[i] != pred->val[i]) 1569 return; 1570 #endif 1571 /* Pull up the node. */ 1572 pull = *samep; 1573 *samep = JT(pull); 1574 JT(pull) = *diffp; 1575 1576 /* 1577 * At the top of the chain, each predecessor needs to point at the 1578 * pulled up node. Inside the chain, there is only one predecessor 1579 * to worry about. 1580 */ 1581 if (at_top) { 1582 for (ep = b->in_edges; ep != 0; ep = ep->next) { 1583 if (JT(ep->pred) == b) 1584 JT(ep->pred) = pull; 1585 else 1586 JF(ep->pred) = pull; 1587 } 1588 } 1589 else 1590 *diffp = pull; 1591 1592 opt_state->done = 0; 1593 } 1594 1595 static void 1596 opt_blks(compiler_state_t *cstate, opt_state_t *opt_state, struct icode *ic, 1597 int do_stmts) 1598 { 1599 int i, maxlevel; 1600 struct block *p; 1601 1602 init_val(opt_state); 1603 maxlevel = ic->root->level; 1604 1605 find_inedges(opt_state, ic->root); 1606 for (i = maxlevel; i >= 0; --i) 1607 for (p = opt_state->levels[i]; p; p = p->link) 1608 opt_blk(cstate, ic, opt_state, p, do_stmts); 1609 1610 if (do_stmts) 1611 /* 1612 * No point trying to move branches; it can't possibly 1613 * make a difference at this point. 1614 */ 1615 return; 1616 1617 for (i = 1; i <= maxlevel; ++i) { 1618 for (p = opt_state->levels[i]; p; p = p->link) { 1619 opt_j(opt_state, &p->et); 1620 opt_j(opt_state, &p->ef); 1621 } 1622 } 1623 1624 find_inedges(opt_state, ic->root); 1625 for (i = 1; i <= maxlevel; ++i) { 1626 for (p = opt_state->levels[i]; p; p = p->link) { 1627 or_pullup(opt_state, p); 1628 and_pullup(opt_state, p); 1629 } 1630 } 1631 } 1632 1633 static inline void 1634 link_inedge(struct edge *parent, struct block *child) 1635 { 1636 parent->next = child->in_edges; 1637 child->in_edges = parent; 1638 } 1639 1640 static void 1641 find_inedges(opt_state_t *opt_state, struct block *root) 1642 { 1643 int i; 1644 struct block *b; 1645 1646 for (i = 0; i < opt_state->n_blocks; ++i) 1647 opt_state->blocks[i]->in_edges = 0; 1648 1649 /* 1650 * Traverse the graph, adding each edge to the predecessor 1651 * list of its successors. Skip the leaves (i.e. level 0). 1652 */ 1653 for (i = root->level; i > 0; --i) { 1654 for (b = opt_state->levels[i]; b != 0; b = b->link) { 1655 link_inedge(&b->et, JT(b)); 1656 link_inedge(&b->ef, JF(b)); 1657 } 1658 } 1659 } 1660 1661 static void 1662 opt_root(struct block **b) 1663 { 1664 struct slist *tmp, *s; 1665 1666 s = (*b)->stmts; 1667 (*b)->stmts = 0; 1668 while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b)) 1669 *b = JT(*b); 1670 1671 tmp = (*b)->stmts; 1672 if (tmp != 0) 1673 sappend(s, tmp); 1674 (*b)->stmts = s; 1675 1676 /* 1677 * If the root node is a return, then there is no 1678 * point executing any statements (since the bpf machine 1679 * has no side effects). 1680 */ 1681 if (BPF_CLASS((*b)->s.code) == BPF_RET) 1682 (*b)->stmts = 0; 1683 } 1684 1685 static void 1686 opt_loop(compiler_state_t *cstate, opt_state_t *opt_state, struct icode *ic, 1687 int do_stmts) 1688 { 1689 1690 #ifdef BDEBUG 1691 if (pcap_optimizer_debug > 1) { 1692 printf("opt_loop(root, %d) begin\n", do_stmts); 1693 opt_dump(cstate, ic); 1694 } 1695 #endif 1696 do { 1697 opt_state->done = 1; 1698 find_levels(opt_state, ic); 1699 find_dom(opt_state, ic->root); 1700 find_closure(opt_state, ic->root); 1701 find_ud(opt_state, ic->root); 1702 find_edom(opt_state, ic->root); 1703 opt_blks(cstate, opt_state, ic, do_stmts); 1704 #ifdef BDEBUG 1705 if (pcap_optimizer_debug > 1) { 1706 printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, opt_state->done); 1707 opt_dump(cstate, ic); 1708 } 1709 #endif 1710 } while (!opt_state->done); 1711 } 1712 1713 /* 1714 * Optimize the filter code in its dag representation. 1715 */ 1716 void 1717 bpf_optimize(compiler_state_t *cstate, struct icode *ic) 1718 { 1719 opt_state_t opt_state; 1720 1721 opt_init(cstate, &opt_state, ic); 1722 opt_loop(cstate, &opt_state, ic, 0); 1723 opt_loop(cstate, &opt_state, ic, 1); 1724 intern_blocks(&opt_state, ic); 1725 #ifdef BDEBUG 1726 if (pcap_optimizer_debug > 1) { 1727 printf("after intern_blocks()\n"); 1728 opt_dump(cstate, ic); 1729 } 1730 #endif 1731 opt_root(&ic->root); 1732 #ifdef BDEBUG 1733 if (pcap_optimizer_debug > 1) { 1734 printf("after opt_root()\n"); 1735 opt_dump(cstate, ic); 1736 } 1737 #endif 1738 opt_cleanup(&opt_state); 1739 } 1740 1741 static void 1742 make_marks(struct icode *ic, struct block *p) 1743 { 1744 if (!isMarked(ic, p)) { 1745 Mark(ic, p); 1746 if (BPF_CLASS(p->s.code) != BPF_RET) { 1747 make_marks(ic, JT(p)); 1748 make_marks(ic, JF(p)); 1749 } 1750 } 1751 } 1752 1753 /* 1754 * Mark code array such that isMarked(ic->cur_mark, i) is true 1755 * only for nodes that are alive. 1756 */ 1757 static void 1758 mark_code(struct icode *ic) 1759 { 1760 ic->cur_mark += 1; 1761 make_marks(ic, ic->root); 1762 } 1763 1764 /* 1765 * True iff the two stmt lists load the same value from the packet into 1766 * the accumulator. 1767 */ 1768 static int 1769 eq_slist(struct slist *x, struct slist *y) 1770 { 1771 while (1) { 1772 while (x && x->s.code == NOP) 1773 x = x->next; 1774 while (y && y->s.code == NOP) 1775 y = y->next; 1776 if (x == 0) 1777 return y == 0; 1778 if (y == 0) 1779 return x == 0; 1780 if (x->s.code != y->s.code || x->s.k != y->s.k) 1781 return 0; 1782 x = x->next; 1783 y = y->next; 1784 } 1785 } 1786 1787 static inline int 1788 eq_blk(struct block *b0, struct block *b1) 1789 { 1790 if (b0->s.code == b1->s.code && 1791 b0->s.k == b1->s.k && 1792 b0->et.succ == b1->et.succ && 1793 b0->ef.succ == b1->ef.succ) 1794 return eq_slist(b0->stmts, b1->stmts); 1795 return 0; 1796 } 1797 1798 static void 1799 intern_blocks(opt_state_t *opt_state, struct icode *ic) 1800 { 1801 struct block *p; 1802 int i, j; 1803 int done1; /* don't shadow global */ 1804 top: 1805 done1 = 1; 1806 for (i = 0; i < opt_state->n_blocks; ++i) 1807 opt_state->blocks[i]->link = 0; 1808 1809 mark_code(ic); 1810 1811 for (i = opt_state->n_blocks - 1; --i >= 0; ) { 1812 if (!isMarked(ic, opt_state->blocks[i])) 1813 continue; 1814 for (j = i + 1; j < opt_state->n_blocks; ++j) { 1815 if (!isMarked(ic, opt_state->blocks[j])) 1816 continue; 1817 if (eq_blk(opt_state->blocks[i], opt_state->blocks[j])) { 1818 opt_state->blocks[i]->link = opt_state->blocks[j]->link ? 1819 opt_state->blocks[j]->link : opt_state->blocks[j]; 1820 break; 1821 } 1822 } 1823 } 1824 for (i = 0; i < opt_state->n_blocks; ++i) { 1825 p = opt_state->blocks[i]; 1826 if (JT(p) == 0) 1827 continue; 1828 if (JT(p)->link) { 1829 done1 = 0; 1830 JT(p) = JT(p)->link; 1831 } 1832 if (JF(p)->link) { 1833 done1 = 0; 1834 JF(p) = JF(p)->link; 1835 } 1836 } 1837 if (!done1) 1838 goto top; 1839 } 1840 1841 static void 1842 opt_cleanup(opt_state_t *opt_state) 1843 { 1844 free((void *)opt_state->vnode_base); 1845 free((void *)opt_state->vmap); 1846 free((void *)opt_state->edges); 1847 free((void *)opt_state->space); 1848 free((void *)opt_state->levels); 1849 free((void *)opt_state->blocks); 1850 } 1851 1852 /* 1853 * Return the number of stmts in 's'. 1854 */ 1855 static u_int 1856 slength(struct slist *s) 1857 { 1858 u_int n = 0; 1859 1860 for (; s; s = s->next) 1861 if (s->s.code != NOP) 1862 ++n; 1863 return n; 1864 } 1865 1866 /* 1867 * Return the number of nodes reachable by 'p'. 1868 * All nodes should be initially unmarked. 1869 */ 1870 static int 1871 count_blocks(struct icode *ic, struct block *p) 1872 { 1873 if (p == 0 || isMarked(ic, p)) 1874 return 0; 1875 Mark(ic, p); 1876 return count_blocks(ic, JT(p)) + count_blocks(ic, JF(p)) + 1; 1877 } 1878 1879 /* 1880 * Do a depth first search on the flow graph, numbering the 1881 * the basic blocks, and entering them into the 'blocks' array.` 1882 */ 1883 static void 1884 number_blks_r(opt_state_t *opt_state, struct icode *ic, struct block *p) 1885 { 1886 int n; 1887 1888 if (p == 0 || isMarked(ic, p)) 1889 return; 1890 1891 Mark(ic, p); 1892 n = opt_state->n_blocks++; 1893 p->id = n; 1894 opt_state->blocks[n] = p; 1895 1896 number_blks_r(opt_state, ic, JT(p)); 1897 number_blks_r(opt_state, ic, JF(p)); 1898 } 1899 1900 /* 1901 * Return the number of stmts in the flowgraph reachable by 'p'. 1902 * The nodes should be unmarked before calling. 1903 * 1904 * Note that "stmts" means "instructions", and that this includes 1905 * 1906 * side-effect statements in 'p' (slength(p->stmts)); 1907 * 1908 * statements in the true branch from 'p' (count_stmts(JT(p))); 1909 * 1910 * statements in the false branch from 'p' (count_stmts(JF(p))); 1911 * 1912 * the conditional jump itself (1); 1913 * 1914 * an extra long jump if the true branch requires it (p->longjt); 1915 * 1916 * an extra long jump if the false branch requires it (p->longjf). 1917 */ 1918 static u_int 1919 count_stmts(struct icode *ic, struct block *p) 1920 { 1921 u_int n; 1922 1923 if (p == 0 || isMarked(ic, p)) 1924 return 0; 1925 Mark(ic, p); 1926 n = count_stmts(ic, JT(p)) + count_stmts(ic, JF(p)); 1927 return slength(p->stmts) + n + 1 + p->longjt + p->longjf; 1928 } 1929 1930 /* 1931 * Allocate memory. All allocation is done before optimization 1932 * is begun. A linear bound on the size of all data structures is computed 1933 * from the total number of blocks and/or statements. 1934 */ 1935 static void 1936 opt_init(compiler_state_t *cstate, opt_state_t *opt_state, struct icode *ic) 1937 { 1938 bpf_u_int32 *p; 1939 int i, n, max_stmts; 1940 1941 /* 1942 * First, count the blocks, so we can malloc an array to map 1943 * block number to block. Then, put the blocks into the array. 1944 */ 1945 unMarkAll(ic); 1946 n = count_blocks(ic, ic->root); 1947 opt_state->blocks = (struct block **)calloc(n, sizeof(*opt_state->blocks)); 1948 if (opt_state->blocks == NULL) 1949 bpf_error(cstate, "malloc"); 1950 unMarkAll(ic); 1951 opt_state->n_blocks = 0; 1952 number_blks_r(opt_state, ic, ic->root); 1953 1954 opt_state->n_edges = 2 * opt_state->n_blocks; 1955 opt_state->edges = (struct edge **)calloc(opt_state->n_edges, sizeof(*opt_state->edges)); 1956 if (opt_state->edges == NULL) 1957 bpf_error(cstate, "malloc"); 1958 1959 /* 1960 * The number of levels is bounded by the number of nodes. 1961 */ 1962 opt_state->levels = (struct block **)calloc(opt_state->n_blocks, sizeof(*opt_state->levels)); 1963 if (opt_state->levels == NULL) 1964 bpf_error(cstate, "malloc"); 1965 1966 opt_state->edgewords = opt_state->n_edges / (8 * sizeof(bpf_u_int32)) + 1; 1967 opt_state->nodewords = opt_state->n_blocks / (8 * sizeof(bpf_u_int32)) + 1; 1968 1969 /* XXX */ 1970 opt_state->space = (bpf_u_int32 *)malloc(2 * opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->space) 1971 + opt_state->n_edges * opt_state->edgewords * sizeof(*opt_state->space)); 1972 if (opt_state->space == NULL) 1973 bpf_error(cstate, "malloc"); 1974 p = opt_state->space; 1975 opt_state->all_dom_sets = p; 1976 for (i = 0; i < n; ++i) { 1977 opt_state->blocks[i]->dom = p; 1978 p += opt_state->nodewords; 1979 } 1980 opt_state->all_closure_sets = p; 1981 for (i = 0; i < n; ++i) { 1982 opt_state->blocks[i]->closure = p; 1983 p += opt_state->nodewords; 1984 } 1985 opt_state->all_edge_sets = p; 1986 for (i = 0; i < n; ++i) { 1987 register struct block *b = opt_state->blocks[i]; 1988 1989 b->et.edom = p; 1990 p += opt_state->edgewords; 1991 b->ef.edom = p; 1992 p += opt_state->edgewords; 1993 b->et.id = i; 1994 opt_state->edges[i] = &b->et; 1995 b->ef.id = opt_state->n_blocks + i; 1996 opt_state->edges[opt_state->n_blocks + i] = &b->ef; 1997 b->et.pred = b; 1998 b->ef.pred = b; 1999 } 2000 max_stmts = 0; 2001 for (i = 0; i < n; ++i) 2002 max_stmts += slength(opt_state->blocks[i]->stmts) + 1; 2003 /* 2004 * We allocate at most 3 value numbers per statement, 2005 * so this is an upper bound on the number of valnodes 2006 * we'll need. 2007 */ 2008 opt_state->maxval = 3 * max_stmts; 2009 opt_state->vmap = (struct vmapinfo *)calloc(opt_state->maxval, sizeof(*opt_state->vmap)); 2010 opt_state->vnode_base = (struct valnode *)calloc(opt_state->maxval, sizeof(*opt_state->vnode_base)); 2011 if (opt_state->vmap == NULL || opt_state->vnode_base == NULL) 2012 bpf_error(cstate, "malloc"); 2013 } 2014 2015 /* 2016 * This is only used when supporting optimizer debugging. It is 2017 * global state, so do *not* do more than one compile in parallel 2018 * and expect it to provide meaningful information. 2019 */ 2020 #ifdef BDEBUG 2021 int bids[1000]; 2022 #endif 2023 2024 /* 2025 * Returns true if successful. Returns false if a branch has 2026 * an offset that is too large. If so, we have marked that 2027 * branch so that on a subsequent iteration, it will be treated 2028 * properly. 2029 */ 2030 static int 2031 convert_code_r(compiler_state_t *cstate, conv_state_t *conv_state, 2032 struct icode *ic, struct block *p) 2033 { 2034 struct bpf_insn *dst; 2035 struct slist *src; 2036 u_int slen; 2037 u_int off; 2038 int extrajmps; /* number of extra jumps inserted */ 2039 struct slist **offset = NULL; 2040 2041 if (p == 0 || isMarked(ic, p)) 2042 return (1); 2043 Mark(ic, p); 2044 2045 if (convert_code_r(cstate, conv_state, ic, JF(p)) == 0) 2046 return (0); 2047 if (convert_code_r(cstate, conv_state, ic, JT(p)) == 0) 2048 return (0); 2049 2050 slen = slength(p->stmts); 2051 dst = conv_state->ftail -= (slen + 1 + p->longjt + p->longjf); 2052 /* inflate length by any extra jumps */ 2053 2054 p->offset = (int)(dst - conv_state->fstart); 2055 2056 /* generate offset[] for convenience */ 2057 if (slen) { 2058 offset = (struct slist **)calloc(slen, sizeof(struct slist *)); 2059 if (!offset) { 2060 bpf_error(cstate, "not enough core"); 2061 /*NOTREACHED*/ 2062 } 2063 } 2064 src = p->stmts; 2065 for (off = 0; off < slen && src; off++) { 2066 #if 0 2067 printf("off=%d src=%x\n", off, src); 2068 #endif 2069 offset[off] = src; 2070 src = src->next; 2071 } 2072 2073 off = 0; 2074 for (src = p->stmts; src; src = src->next) { 2075 if (src->s.code == NOP) 2076 continue; 2077 dst->code = (u_short)src->s.code; 2078 dst->k = src->s.k; 2079 2080 /* fill block-local relative jump */ 2081 if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) { 2082 #if 0 2083 if (src->s.jt || src->s.jf) { 2084 bpf_error(cstate, "illegal jmp destination"); 2085 /*NOTREACHED*/ 2086 } 2087 #endif 2088 goto filled; 2089 } 2090 if (off == slen - 2) /*???*/ 2091 goto filled; 2092 2093 { 2094 u_int i; 2095 int jt, jf; 2096 static const char ljerr[] = "%s for block-local relative jump: off=%d"; 2097 2098 #if 0 2099 printf("code=%x off=%d %x %x\n", src->s.code, 2100 off, src->s.jt, src->s.jf); 2101 #endif 2102 2103 if (!src->s.jt || !src->s.jf) { 2104 bpf_error(cstate, ljerr, "no jmp destination", off); 2105 /*NOTREACHED*/ 2106 } 2107 2108 jt = jf = 0; 2109 for (i = 0; i < slen; i++) { 2110 if (offset[i] == src->s.jt) { 2111 if (jt) { 2112 bpf_error(cstate, ljerr, "multiple matches", off); 2113 /*NOTREACHED*/ 2114 } 2115 2116 dst->jt = i - off - 1; 2117 jt++; 2118 } 2119 if (offset[i] == src->s.jf) { 2120 if (jf) { 2121 bpf_error(cstate, ljerr, "multiple matches", off); 2122 /*NOTREACHED*/ 2123 } 2124 dst->jf = i - off - 1; 2125 jf++; 2126 } 2127 } 2128 if (!jt || !jf) { 2129 bpf_error(cstate, ljerr, "no destination found", off); 2130 /*NOTREACHED*/ 2131 } 2132 } 2133 filled: 2134 ++dst; 2135 ++off; 2136 } 2137 if (offset) 2138 free(offset); 2139 2140 #ifdef BDEBUG 2141 bids[dst - conv_state->fstart] = p->id + 1; 2142 #endif 2143 dst->code = (u_short)p->s.code; 2144 dst->k = p->s.k; 2145 if (JT(p)) { 2146 extrajmps = 0; 2147 off = JT(p)->offset - (p->offset + slen) - 1; 2148 if (off >= 256) { 2149 /* offset too large for branch, must add a jump */ 2150 if (p->longjt == 0) { 2151 /* mark this instruction and retry */ 2152 p->longjt++; 2153 return(0); 2154 } 2155 /* branch if T to following jump */ 2156 dst->jt = extrajmps; 2157 extrajmps++; 2158 dst[extrajmps].code = BPF_JMP|BPF_JA; 2159 dst[extrajmps].k = off - extrajmps; 2160 } 2161 else 2162 dst->jt = off; 2163 off = JF(p)->offset - (p->offset + slen) - 1; 2164 if (off >= 256) { 2165 /* offset too large for branch, must add a jump */ 2166 if (p->longjf == 0) { 2167 /* mark this instruction and retry */ 2168 p->longjf++; 2169 return(0); 2170 } 2171 /* branch if F to following jump */ 2172 /* if two jumps are inserted, F goes to second one */ 2173 dst->jf = extrajmps; 2174 extrajmps++; 2175 dst[extrajmps].code = BPF_JMP|BPF_JA; 2176 dst[extrajmps].k = off - extrajmps; 2177 } 2178 else 2179 dst->jf = off; 2180 } 2181 return (1); 2182 } 2183 2184 2185 /* 2186 * Convert flowgraph intermediate representation to the 2187 * BPF array representation. Set *lenp to the number of instructions. 2188 * 2189 * This routine does *NOT* leak the memory pointed to by fp. It *must 2190 * not* do free(fp) before returning fp; doing so would make no sense, 2191 * as the BPF array pointed to by the return value of icode_to_fcode() 2192 * must be valid - it's being returned for use in a bpf_program structure. 2193 * 2194 * If it appears that icode_to_fcode() is leaking, the problem is that 2195 * the program using pcap_compile() is failing to free the memory in 2196 * the BPF program when it's done - the leak is in the program, not in 2197 * the routine that happens to be allocating the memory. (By analogy, if 2198 * a program calls fopen() without ever calling fclose() on the FILE *, 2199 * it will leak the FILE structure; the leak is not in fopen(), it's in 2200 * the program.) Change the program to use pcap_freecode() when it's 2201 * done with the filter program. See the pcap man page. 2202 */ 2203 struct bpf_insn * 2204 icode_to_fcode(compiler_state_t *cstate, struct icode *ic, 2205 struct block *root, u_int *lenp) 2206 { 2207 u_int n; 2208 struct bpf_insn *fp; 2209 conv_state_t conv_state; 2210 2211 /* 2212 * Loop doing convert_code_r() until no branches remain 2213 * with too-large offsets. 2214 */ 2215 while (1) { 2216 unMarkAll(ic); 2217 n = *lenp = count_stmts(ic, root); 2218 2219 fp = (struct bpf_insn *)malloc(sizeof(*fp) * n); 2220 if (fp == NULL) 2221 bpf_error(cstate, "malloc"); 2222 memset((char *)fp, 0, sizeof(*fp) * n); 2223 conv_state.fstart = fp; 2224 conv_state.ftail = fp + n; 2225 2226 unMarkAll(ic); 2227 if (convert_code_r(cstate, &conv_state, ic, root)) 2228 break; 2229 free(fp); 2230 } 2231 2232 return fp; 2233 } 2234 2235 /* 2236 * Make a copy of a BPF program and put it in the "fcode" member of 2237 * a "pcap_t". 2238 * 2239 * If we fail to allocate memory for the copy, fill in the "errbuf" 2240 * member of the "pcap_t" with an error message, and return -1; 2241 * otherwise, return 0. 2242 */ 2243 int 2244 install_bpf_program(pcap_t *p, struct bpf_program *fp) 2245 { 2246 size_t prog_size; 2247 2248 /* 2249 * Validate the program. 2250 */ 2251 if (!bpf_validate(fp->bf_insns, fp->bf_len)) { 2252 pcap_snprintf(p->errbuf, sizeof(p->errbuf), 2253 "BPF program is not valid"); 2254 return (-1); 2255 } 2256 2257 /* 2258 * Free up any already installed program. 2259 */ 2260 pcap_freecode(&p->fcode); 2261 2262 prog_size = sizeof(*fp->bf_insns) * fp->bf_len; 2263 p->fcode.bf_len = fp->bf_len; 2264 p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size); 2265 if (p->fcode.bf_insns == NULL) { 2266 pcap_snprintf(p->errbuf, sizeof(p->errbuf), 2267 "malloc: %s", pcap_strerror(errno)); 2268 return (-1); 2269 } 2270 memcpy(p->fcode.bf_insns, fp->bf_insns, prog_size); 2271 return (0); 2272 } 2273 2274 #ifdef BDEBUG 2275 static void 2276 dot_dump_node(struct icode *ic, struct block *block, struct bpf_program *prog, 2277 FILE *out) 2278 { 2279 int icount, noffset; 2280 int i; 2281 2282 if (block == NULL || isMarked(ic, block)) 2283 return; 2284 Mark(ic, block); 2285 2286 icount = slength(block->stmts) + 1 + block->longjt + block->longjf; 2287 noffset = min(block->offset + icount, (int)prog->bf_len); 2288 2289 fprintf(out, "\tblock%d [shape=ellipse, id=\"block-%d\" label=\"BLOCK%d\\n", block->id, block->id, block->id); 2290 for (i = block->offset; i < noffset; i++) { 2291 fprintf(out, "\\n%s", bpf_image(prog->bf_insns + i, i)); 2292 } 2293 fprintf(out, "\" tooltip=\""); 2294 for (i = 0; i < BPF_MEMWORDS; i++) 2295 if (block->val[i] != 0) 2296 fprintf(out, "val[%d]=%d ", i, block->val[i]); 2297 fprintf(out, "val[A]=%d ", block->val[A_ATOM]); 2298 fprintf(out, "val[X]=%d", block->val[X_ATOM]); 2299 fprintf(out, "\""); 2300 if (JT(block) == NULL) 2301 fprintf(out, ", peripheries=2"); 2302 fprintf(out, "];\n"); 2303 2304 dot_dump_node(ic, JT(block), prog, out); 2305 dot_dump_node(ic, JF(block), prog, out); 2306 } 2307 2308 static void 2309 dot_dump_edge(struct icode *ic, struct block *block, FILE *out) 2310 { 2311 if (block == NULL || isMarked(ic, block)) 2312 return; 2313 Mark(ic, block); 2314 2315 if (JT(block)) { 2316 fprintf(out, "\t\"block%d\":se -> \"block%d\":n [label=\"T\"]; \n", 2317 block->id, JT(block)->id); 2318 fprintf(out, "\t\"block%d\":sw -> \"block%d\":n [label=\"F\"]; \n", 2319 block->id, JF(block)->id); 2320 } 2321 dot_dump_edge(ic, JT(block), out); 2322 dot_dump_edge(ic, JF(block), out); 2323 } 2324 2325 /* Output the block CFG using graphviz/DOT language 2326 * In the CFG, block's code, value index for each registers at EXIT, 2327 * and the jump relationship is show. 2328 * 2329 * example DOT for BPF `ip src host 1.1.1.1' is: 2330 digraph BPF { 2331 block0 [shape=ellipse, id="block-0" label="BLOCK0\n\n(000) ldh [12]\n(001) jeq #0x800 jt 2 jf 5" tooltip="val[A]=0 val[X]=0"]; 2332 block1 [shape=ellipse, id="block-1" label="BLOCK1\n\n(002) ld [26]\n(003) jeq #0x1010101 jt 4 jf 5" tooltip="val[A]=0 val[X]=0"]; 2333 block2 [shape=ellipse, id="block-2" label="BLOCK2\n\n(004) ret #68" tooltip="val[A]=0 val[X]=0", peripheries=2]; 2334 block3 [shape=ellipse, id="block-3" label="BLOCK3\n\n(005) ret #0" tooltip="val[A]=0 val[X]=0", peripheries=2]; 2335 "block0":se -> "block1":n [label="T"]; 2336 "block0":sw -> "block3":n [label="F"]; 2337 "block1":se -> "block2":n [label="T"]; 2338 "block1":sw -> "block3":n [label="F"]; 2339 } 2340 * 2341 * After install graphviz on http://www.graphviz.org/, save it as bpf.dot 2342 * and run `dot -Tpng -O bpf.dot' to draw the graph. 2343 */ 2344 static void 2345 dot_dump(compiler_state_t *cstate, struct icode *ic) 2346 { 2347 struct bpf_program f; 2348 FILE *out = stdout; 2349 2350 memset(bids, 0, sizeof bids); 2351 f.bf_insns = icode_to_fcode(cstate, ic, ic->root, &f.bf_len); 2352 2353 fprintf(out, "digraph BPF {\n"); 2354 ic->cur_mark = 0; 2355 unMarkAll(ic); 2356 dot_dump_node(ic, ic->root, &f, out); 2357 ic->cur_mark = 0; 2358 unMarkAll(ic); 2359 dot_dump_edge(ic, ic->root, out); 2360 fprintf(out, "}\n"); 2361 2362 free((char *)f.bf_insns); 2363 } 2364 2365 static void 2366 plain_dump(compiler_state_t *cstate, struct icode *ic) 2367 { 2368 struct bpf_program f; 2369 2370 memset(bids, 0, sizeof bids); 2371 f.bf_insns = icode_to_fcode(cstate, ic, ic->root, &f.bf_len); 2372 bpf_dump(&f, 1); 2373 putchar('\n'); 2374 free((char *)f.bf_insns); 2375 } 2376 2377 static void 2378 opt_dump(compiler_state_t *cstate, struct icode *ic) 2379 { 2380 /* if optimizer debugging is enabled, output DOT graph 2381 * `pcap_optimizer_debug=4' is equivalent to -dddd to follow -d/-dd/-ddd 2382 * convention in tcpdump command line 2383 */ 2384 if (pcap_optimizer_debug > 3) 2385 dot_dump(cstate, ic); 2386 else 2387 plain_dump(cstate, ic); 2388 } 2389 #endif 2390