xref: /openbsd-src/lib/libpcap/optimize.c (revision 1fc27e414118cd8922c6b93fbaeb7a5246bfd593)
1 /*	$OpenBSD: optimize.c,v 1.6 1999/07/20 04:49:55 deraadt Exp $	*/
2 
3 /*
4  * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that: (1) source code distributions
9  * retain the above copyright notice and this paragraph in its entirety, (2)
10  * distributions including binary code include the above copyright notice and
11  * this paragraph in its entirety in the documentation or other materials
12  * provided with the distribution, and (3) all advertising materials mentioning
13  * features or use of this software display the following acknowledgement:
14  * ``This product includes software developed by the University of California,
15  * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
16  * the University nor the names of its contributors may be used to endorse
17  * or promote products derived from this software without specific prior
18  * written permission.
19  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
20  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
21  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
22  *
23  *  Optimization module for tcpdump intermediate representation.
24  */
25 #ifndef lint
26 static const char rcsid[] =
27     "@(#) $Header: /home/cvs/src/lib/libpcap/optimize.c,v 1.6 1999/07/20 04:49:55 deraadt Exp $ (LBL)";
28 #endif
29 
30 #include <sys/types.h>
31 #include <sys/time.h>
32 
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <memory.h>
36 
37 #include "pcap-int.h"
38 
39 #include "gencode.h"
40 
41 #ifdef HAVE_OS_PROTO_H
42 #include "os-proto.h"
43 #endif
44 
45 #ifdef BDEBUG
46 extern int dflag;
47 #endif
48 
49 #define A_ATOM BPF_MEMWORDS
50 #define X_ATOM (BPF_MEMWORDS+1)
51 
52 #define NOP -1
53 
54 /*
55  * This define is used to represent *both* the accumulator and
56  * x register in use-def computations.
57  * Currently, the use-def code assumes only one definition per instruction.
58  */
59 #define AX_ATOM N_ATOMS
60 
61 /*
62  * A flag to indicate that further optimization is needed.
63  * Iterative passes are continued until a given pass yields no
64  * branch movement.
65  */
66 static int done;
67 
68 /*
69  * A block is marked if only if its mark equals the current mark.
70  * Rather than traverse the code array, marking each item, 'cur_mark' is
71  * incremented.  This automatically makes each element unmarked.
72  */
73 static int cur_mark;
74 #define isMarked(p) ((p)->mark == cur_mark)
75 #define unMarkAll() cur_mark += 1
76 #define Mark(p) ((p)->mark = cur_mark)
77 
78 static void opt_init(struct block *);
79 static void opt_cleanup(void);
80 
81 static void make_marks(struct block *);
82 static void mark_code(struct block *);
83 
84 static void intern_blocks(struct block *);
85 
86 static int eq_slist(struct slist *, struct slist *);
87 
88 static void find_levels_r(struct block *);
89 
90 static void find_levels(struct block *);
91 static void find_dom(struct block *);
92 static void propedom(struct edge *);
93 static void find_edom(struct block *);
94 static void find_closure(struct block *);
95 static int atomuse(struct stmt *);
96 static int atomdef(struct stmt *);
97 static void compute_local_ud(struct block *);
98 static void find_ud(struct block *);
99 static void init_val(void);
100 static int F(int, int, int);
101 static __inline void vstore(struct stmt *, int *, int, int);
102 static void opt_blk(struct block *, int);
103 static int use_conflict(struct block *, struct block *);
104 static void opt_j(struct edge *);
105 static void or_pullup(struct block *);
106 static void and_pullup(struct block *);
107 static void opt_blks(struct block *, int);
108 static __inline void link_inedge(struct edge *, struct block *);
109 static void find_inedges(struct block *);
110 static void opt_root(struct block **);
111 static void opt_loop(struct block *, int);
112 static void fold_op(struct stmt *, int, int);
113 static __inline struct slist *this_op(struct slist *);
114 static void opt_not(struct block *);
115 static void opt_peep(struct block *);
116 static void opt_stmt(struct stmt *, int[], int);
117 static void deadstmt(struct stmt *, struct stmt *[]);
118 static void opt_deadstores(struct block *);
119 static void opt_blk(struct block *, int);
120 static int use_conflict(struct block *, struct block *);
121 static void opt_j(struct edge *);
122 static struct block *fold_edge(struct block *, struct edge *);
123 static __inline int eq_blk(struct block *, struct block *);
124 static int slength(struct slist *);
125 static int count_blocks(struct block *);
126 static void number_blks_r(struct block *);
127 static int count_stmts(struct block *);
128 static int convert_code_r(struct block *);
129 #ifdef BDEBUG
130 static void opt_dump(struct block *);
131 #endif
132 
133 static int n_blocks;
134 struct block **blocks;
135 static int n_edges;
136 struct edge **edges;
137 
138 /*
139  * A bit vector set representation of the dominators.
140  * We round up the set size to the next power of two.
141  */
142 static int nodewords;
143 static int edgewords;
144 struct block **levels;
145 bpf_u_int32 *space;
146 #define BITS_PER_WORD (8*sizeof(bpf_u_int32))
147 /*
148  * True if a is in uset {p}
149  */
150 #define SET_MEMBER(p, a) \
151 ((p)[(unsigned)(a) / BITS_PER_WORD] & (1 << ((unsigned)(a) % BITS_PER_WORD)))
152 
153 /*
154  * Add 'a' to uset p.
155  */
156 #define SET_INSERT(p, a) \
157 (p)[(unsigned)(a) / BITS_PER_WORD] |= (1 << ((unsigned)(a) % BITS_PER_WORD))
158 
159 /*
160  * Delete 'a' from uset p.
161  */
162 #define SET_DELETE(p, a) \
163 (p)[(unsigned)(a) / BITS_PER_WORD] &= ~(1 << ((unsigned)(a) % BITS_PER_WORD))
164 
165 /*
166  * a := a intersect b
167  */
168 #define SET_INTERSECT(a, b, n)\
169 {\
170 	register bpf_u_int32 *_x = a, *_y = b;\
171 	register int _n = n;\
172 	while (--_n >= 0) *_x++ &= *_y++;\
173 }
174 
175 /*
176  * a := a - b
177  */
178 #define SET_SUBTRACT(a, b, n)\
179 {\
180 	register bpf_u_int32 *_x = a, *_y = b;\
181 	register int _n = n;\
182 	while (--_n >= 0) *_x++ &=~ *_y++;\
183 }
184 
185 /*
186  * a := a union b
187  */
188 #define SET_UNION(a, b, n)\
189 {\
190 	register bpf_u_int32 *_x = a, *_y = b;\
191 	register int _n = n;\
192 	while (--_n >= 0) *_x++ |= *_y++;\
193 }
194 
195 static uset all_dom_sets;
196 static uset all_closure_sets;
197 static uset all_edge_sets;
198 
199 #ifndef MAX
200 #define MAX(a,b) ((a)>(b)?(a):(b))
201 #endif
202 
203 static void
204 find_levels_r(b)
205 	struct block *b;
206 {
207 	int level;
208 
209 	if (isMarked(b))
210 		return;
211 
212 	Mark(b);
213 	b->link = 0;
214 
215 	if (JT(b)) {
216 		find_levels_r(JT(b));
217 		find_levels_r(JF(b));
218 		level = MAX(JT(b)->level, JF(b)->level) + 1;
219 	} else
220 		level = 0;
221 	b->level = level;
222 	b->link = levels[level];
223 	levels[level] = b;
224 }
225 
226 /*
227  * Level graph.  The levels go from 0 at the leaves to
228  * N_LEVELS at the root.  The levels[] array points to the
229  * first node of the level list, whose elements are linked
230  * with the 'link' field of the struct block.
231  */
232 static void
233 find_levels(root)
234 	struct block *root;
235 {
236 	memset((char *)levels, 0, n_blocks * sizeof(*levels));
237 	unMarkAll();
238 	find_levels_r(root);
239 }
240 
241 /*
242  * Find dominator relationships.
243  * Assumes graph has been leveled.
244  */
245 static void
246 find_dom(root)
247 	struct block *root;
248 {
249 	int i;
250 	struct block *b;
251 	bpf_u_int32 *x;
252 
253 	/*
254 	 * Initialize sets to contain all nodes.
255 	 */
256 	x = all_dom_sets;
257 	i = n_blocks * nodewords;
258 	while (--i >= 0)
259 		*x++ = ~0;
260 	/* Root starts off empty. */
261 	for (i = nodewords; --i >= 0;)
262 		root->dom[i] = 0;
263 
264 	/* root->level is the highest level no found. */
265 	for (i = root->level; i >= 0; --i) {
266 		for (b = levels[i]; b; b = b->link) {
267 			SET_INSERT(b->dom, b->id);
268 			if (JT(b) == 0)
269 				continue;
270 			SET_INTERSECT(JT(b)->dom, b->dom, nodewords);
271 			SET_INTERSECT(JF(b)->dom, b->dom, nodewords);
272 		}
273 	}
274 }
275 
276 static void
277 propedom(ep)
278 	struct edge *ep;
279 {
280 	SET_INSERT(ep->edom, ep->id);
281 	if (ep->succ) {
282 		SET_INTERSECT(ep->succ->et.edom, ep->edom, edgewords);
283 		SET_INTERSECT(ep->succ->ef.edom, ep->edom, edgewords);
284 	}
285 }
286 
287 /*
288  * Compute edge dominators.
289  * Assumes graph has been leveled and predecessors established.
290  */
291 static void
292 find_edom(root)
293 	struct block *root;
294 {
295 	int i;
296 	uset x;
297 	struct block *b;
298 
299 	x = all_edge_sets;
300 	for (i = n_edges * edgewords; --i >= 0; )
301 		x[i] = ~0;
302 
303 	/* root->level is the highest level no found. */
304 	memset(root->et.edom, 0, edgewords * sizeof(*(uset)0));
305 	memset(root->ef.edom, 0, edgewords * sizeof(*(uset)0));
306 	for (i = root->level; i >= 0; --i) {
307 		for (b = levels[i]; b != 0; b = b->link) {
308 			propedom(&b->et);
309 			propedom(&b->ef);
310 		}
311 	}
312 }
313 
314 /*
315  * Find the backwards transitive closure of the flow graph.  These sets
316  * are backwards in the sense that we find the set of nodes that reach
317  * a given node, not the set of nodes that can be reached by a node.
318  *
319  * Assumes graph has been leveled.
320  */
321 static void
322 find_closure(root)
323 	struct block *root;
324 {
325 	int i;
326 	struct block *b;
327 
328 	/*
329 	 * Initialize sets to contain no nodes.
330 	 */
331 	memset((char *)all_closure_sets, 0,
332 	      n_blocks * nodewords * sizeof(*all_closure_sets));
333 
334 	/* root->level is the highest level no found. */
335 	for (i = root->level; i >= 0; --i) {
336 		for (b = levels[i]; b; b = b->link) {
337 			SET_INSERT(b->closure, b->id);
338 			if (JT(b) == 0)
339 				continue;
340 			SET_UNION(JT(b)->closure, b->closure, nodewords);
341 			SET_UNION(JF(b)->closure, b->closure, nodewords);
342 		}
343 	}
344 }
345 
346 /*
347  * Return the register number that is used by s.  If A and X are both
348  * used, return AX_ATOM.  If no register is used, return -1.
349  *
350  * The implementation should probably change to an array access.
351  */
352 static int
353 atomuse(s)
354 	struct stmt *s;
355 {
356 	register int c = s->code;
357 
358 	if (c == NOP)
359 		return -1;
360 
361 	switch (BPF_CLASS(c)) {
362 
363 	case BPF_RET:
364 		return (BPF_RVAL(c) == BPF_A) ? A_ATOM :
365 			(BPF_RVAL(c) == BPF_X) ? X_ATOM : -1;
366 
367 	case BPF_LD:
368 	case BPF_LDX:
369 		return (BPF_MODE(c) == BPF_IND) ? X_ATOM :
370 			(BPF_MODE(c) == BPF_MEM) ? s->k : -1;
371 
372 	case BPF_ST:
373 		return A_ATOM;
374 
375 	case BPF_STX:
376 		return X_ATOM;
377 
378 	case BPF_JMP:
379 	case BPF_ALU:
380 		if (BPF_SRC(c) == BPF_X)
381 			return AX_ATOM;
382 		return A_ATOM;
383 
384 	case BPF_MISC:
385 		return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM;
386 	}
387 	abort();
388 	/* NOTREACHED */
389 }
390 
391 /*
392  * Return the register number that is defined by 's'.  We assume that
393  * a single stmt cannot define more than one register.  If no register
394  * is defined, return -1.
395  *
396  * The implementation should probably change to an array access.
397  */
398 static int
399 atomdef(s)
400 	struct stmt *s;
401 {
402 	if (s->code == NOP)
403 		return -1;
404 
405 	switch (BPF_CLASS(s->code)) {
406 
407 	case BPF_LD:
408 	case BPF_ALU:
409 		return A_ATOM;
410 
411 	case BPF_LDX:
412 		return X_ATOM;
413 
414 	case BPF_ST:
415 	case BPF_STX:
416 		return s->k;
417 
418 	case BPF_MISC:
419 		return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM;
420 	}
421 	return -1;
422 }
423 
424 static void
425 compute_local_ud(b)
426 	struct block *b;
427 {
428 	struct slist *s;
429 	atomset def = 0, use = 0, kill = 0;
430 	int atom;
431 
432 	for (s = b->stmts; s; s = s->next) {
433 		if (s->s.code == NOP)
434 			continue;
435 		atom = atomuse(&s->s);
436 		if (atom >= 0) {
437 			if (atom == AX_ATOM) {
438 				if (!ATOMELEM(def, X_ATOM))
439 					use |= ATOMMASK(X_ATOM);
440 				if (!ATOMELEM(def, A_ATOM))
441 					use |= ATOMMASK(A_ATOM);
442 			}
443 			else if (atom < N_ATOMS) {
444 				if (!ATOMELEM(def, atom))
445 					use |= ATOMMASK(atom);
446 			}
447 			else
448 				abort();
449 		}
450 		atom = atomdef(&s->s);
451 		if (atom >= 0) {
452 			if (!ATOMELEM(use, atom))
453 				kill |= ATOMMASK(atom);
454 			def |= ATOMMASK(atom);
455 		}
456 	}
457 	if (!ATOMELEM(def, A_ATOM) && BPF_CLASS(b->s.code) == BPF_JMP)
458 		use |= ATOMMASK(A_ATOM);
459 
460 	b->def = def;
461 	b->kill = kill;
462 	b->in_use = use;
463 }
464 
465 /*
466  * Assume graph is already leveled.
467  */
468 static void
469 find_ud(root)
470 	struct block *root;
471 {
472 	int i, maxlevel;
473 	struct block *p;
474 
475 	/*
476 	 * root->level is the highest level no found;
477 	 * count down from there.
478 	 */
479 	maxlevel = root->level;
480 	for (i = maxlevel; i >= 0; --i)
481 		for (p = levels[i]; p; p = p->link) {
482 			compute_local_ud(p);
483 			p->out_use = 0;
484 		}
485 
486 	for (i = 1; i <= maxlevel; ++i) {
487 		for (p = levels[i]; p; p = p->link) {
488 			p->out_use |= JT(p)->in_use | JF(p)->in_use;
489 			p->in_use |= p->out_use &~ p->kill;
490 		}
491 	}
492 }
493 
494 /*
495  * These data structures are used in a Cocke and Shwarz style
496  * value numbering scheme.  Since the flowgraph is acyclic,
497  * exit values can be propagated from a node's predecessors
498  * provided it is uniquely defined.
499  */
500 struct valnode {
501 	int code;
502 	int v0, v1;
503 	int val;
504 	struct valnode *next;
505 };
506 
507 #define MODULUS 213
508 static struct valnode *hashtbl[MODULUS];
509 static int curval;
510 static int maxval;
511 
512 /* Integer constants mapped with the load immediate opcode. */
513 #define K(i) F(BPF_LD|BPF_IMM|BPF_W, i, 0L)
514 
515 struct vmapinfo {
516 	int is_const;
517 	bpf_int32 const_val;
518 };
519 
520 struct vmapinfo *vmap;
521 struct valnode *vnode_base;
522 struct valnode *next_vnode;
523 
524 static void
525 init_val()
526 {
527 	curval = 0;
528 	next_vnode = vnode_base;
529 	memset((char *)vmap, 0, maxval * sizeof(*vmap));
530 	memset((char *)hashtbl, 0, sizeof hashtbl);
531 }
532 
533 /* Because we really don't have an IR, this stuff is a little messy. */
534 static int
535 F(code, v0, v1)
536 	int code;
537 	int v0, v1;
538 {
539 	u_int hash;
540 	int val;
541 	struct valnode *p;
542 
543 	hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8);
544 	hash %= MODULUS;
545 
546 	for (p = hashtbl[hash]; p; p = p->next)
547 		if (p->code == code && p->v0 == v0 && p->v1 == v1)
548 			return p->val;
549 
550 	val = ++curval;
551 	if (BPF_MODE(code) == BPF_IMM &&
552 	    (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) {
553 		vmap[val].const_val = v0;
554 		vmap[val].is_const = 1;
555 	}
556 	p = next_vnode++;
557 	p->val = val;
558 	p->code = code;
559 	p->v0 = v0;
560 	p->v1 = v1;
561 	p->next = hashtbl[hash];
562 	hashtbl[hash] = p;
563 
564 	return val;
565 }
566 
567 static __inline void
568 vstore(s, valp, newval, alter)
569 	struct stmt *s;
570 	int *valp;
571 	int newval;
572 	int alter;
573 {
574 	if (alter && *valp == newval)
575 		s->code = NOP;
576 	else
577 		*valp = newval;
578 }
579 
580 static void
581 fold_op(s, v0, v1)
582 	struct stmt *s;
583 	int v0, v1;
584 {
585 	bpf_int32 a, b;
586 
587 	a = vmap[v0].const_val;
588 	b = vmap[v1].const_val;
589 
590 	switch (BPF_OP(s->code)) {
591 	case BPF_ADD:
592 		a += b;
593 		break;
594 
595 	case BPF_SUB:
596 		a -= b;
597 		break;
598 
599 	case BPF_MUL:
600 		a *= b;
601 		break;
602 
603 	case BPF_DIV:
604 		if (b == 0)
605 			bpf_error("division by zero");
606 		a /= b;
607 		break;
608 
609 	case BPF_AND:
610 		a &= b;
611 		break;
612 
613 	case BPF_OR:
614 		a |= b;
615 		break;
616 
617 	case BPF_LSH:
618 		a <<= b;
619 		break;
620 
621 	case BPF_RSH:
622 		a >>= b;
623 		break;
624 
625 	case BPF_NEG:
626 		a = -a;
627 		break;
628 
629 	default:
630 		abort();
631 	}
632 	s->k = a;
633 	s->code = BPF_LD|BPF_IMM;
634 	done = 0;
635 }
636 
637 static __inline struct slist *
638 this_op(s)
639 	struct slist *s;
640 {
641 	while (s != 0 && s->s.code == NOP)
642 		s = s->next;
643 	return s;
644 }
645 
646 static void
647 opt_not(b)
648 	struct block *b;
649 {
650 	struct block *tmp = JT(b);
651 
652 	JT(b) = JF(b);
653 	JF(b) = tmp;
654 }
655 
656 static void
657 opt_peep(b)
658 	struct block *b;
659 {
660 	struct slist *s;
661 	struct slist *next, *last;
662 	int val;
663 
664 	s = b->stmts;
665 	if (s == 0)
666 		return;
667 
668 	last = s;
669 	while (1) {
670 		s = this_op(s);
671 		if (s == 0)
672 			break;
673 		next = this_op(s->next);
674 		if (next == 0)
675 			break;
676 		last = next;
677 
678 		/*
679 		 * st  M[k]	-->	st  M[k]
680 		 * ldx M[k]		tax
681 		 */
682 		if (s->s.code == BPF_ST &&
683 		    next->s.code == (BPF_LDX|BPF_MEM) &&
684 		    s->s.k == next->s.k) {
685 			done = 0;
686 			next->s.code = BPF_MISC|BPF_TAX;
687 		}
688 		/*
689 		 * ld  #k	-->	ldx  #k
690 		 * tax			txa
691 		 */
692 		if (s->s.code == (BPF_LD|BPF_IMM) &&
693 		    next->s.code == (BPF_MISC|BPF_TAX)) {
694 			s->s.code = BPF_LDX|BPF_IMM;
695 			next->s.code = BPF_MISC|BPF_TXA;
696 			done = 0;
697 		}
698 		/*
699 		 * This is an ugly special case, but it happens
700 		 * when you say tcp[k] or udp[k] where k is a constant.
701 		 */
702 		if (s->s.code == (BPF_LD|BPF_IMM)) {
703 			struct slist *add, *tax, *ild;
704 
705 			/*
706 			 * Check that X isn't used on exit from this
707 			 * block (which the optimizer might cause).
708 			 * We know the code generator won't generate
709 			 * any local dependencies.
710 			 */
711 			if (ATOMELEM(b->out_use, X_ATOM))
712 				break;
713 
714 			if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B))
715 				add = next;
716 			else
717 				add = this_op(next->next);
718 			if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X))
719 				break;
720 
721 			tax = this_op(add->next);
722 			if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX))
723 				break;
724 
725 			ild = this_op(tax->next);
726 			if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD ||
727 			    BPF_MODE(ild->s.code) != BPF_IND)
728 				break;
729 			/*
730 			 * XXX We need to check that X is not
731 			 * subsequently used.  We know we can eliminate the
732 			 * accumulator modifications since it is defined
733 			 * by the last stmt of this sequence.
734 			 *
735 			 * We want to turn this sequence:
736 			 *
737 			 * (004) ldi     #0x2		{s}
738 			 * (005) ldxms   [14]		{next}  -- optional
739 			 * (006) addx			{add}
740 			 * (007) tax			{tax}
741 			 * (008) ild     [x+0]		{ild}
742 			 *
743 			 * into this sequence:
744 			 *
745 			 * (004) nop
746 			 * (005) ldxms   [14]
747 			 * (006) nop
748 			 * (007) nop
749 			 * (008) ild     [x+2]
750 			 *
751 			 */
752 			ild->s.k += s->s.k;
753 			s->s.code = NOP;
754 			add->s.code = NOP;
755 			tax->s.code = NOP;
756 			done = 0;
757 		}
758 		s = next;
759 	}
760 	/*
761 	 * If we have a subtract to do a comparison, and the X register
762 	 * is a known constant, we can merge this value into the
763 	 * comparison.
764 	 */
765 	if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X) &&
766 	    !ATOMELEM(b->out_use, A_ATOM)) {
767 		val = b->val[X_ATOM];
768 		if (vmap[val].is_const) {
769 			int op;
770 
771 			b->s.k += vmap[val].const_val;
772 			op = BPF_OP(b->s.code);
773 			if (op == BPF_JGT || op == BPF_JGE) {
774 				struct block *t = JT(b);
775 				JT(b) = JF(b);
776 				JF(b) = t;
777 				b->s.k += 0x80000000;
778 			}
779 			last->s.code = NOP;
780 			done = 0;
781 		} else if (b->s.k == 0) {
782 			/*
783 			 * sub x  ->	nop
784 			 * j  #0	j  x
785 			 */
786 			last->s.code = NOP;
787 			b->s.code = BPF_CLASS(b->s.code) | BPF_OP(b->s.code) |
788 				BPF_X;
789 			done = 0;
790 		}
791 	}
792 	/*
793 	 * Likewise, a constant subtract can be simplified.
794 	 */
795 	else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K) &&
796 		 !ATOMELEM(b->out_use, A_ATOM)) {
797 		int op;
798 
799 		b->s.k += last->s.k;
800 		last->s.code = NOP;
801 		op = BPF_OP(b->s.code);
802 		if (op == BPF_JGT || op == BPF_JGE) {
803 			struct block *t = JT(b);
804 			JT(b) = JF(b);
805 			JF(b) = t;
806 			b->s.k += 0x80000000;
807 		}
808 		done = 0;
809 	}
810 	/*
811 	 * and #k	nop
812 	 * jeq #0  ->	jset #k
813 	 */
814 	if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) &&
815 	    !ATOMELEM(b->out_use, A_ATOM) && b->s.k == 0) {
816 		b->s.k = last->s.k;
817 		b->s.code = BPF_JMP|BPF_K|BPF_JSET;
818 		last->s.code = NOP;
819 		done = 0;
820 		opt_not(b);
821 	}
822 	/*
823 	 * If the accumulator is a known constant, we can compute the
824 	 * comparison result.
825 	 */
826 	val = b->val[A_ATOM];
827 	if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) {
828 		bpf_int32 v = vmap[val].const_val;
829 		switch (BPF_OP(b->s.code)) {
830 
831 		case BPF_JEQ:
832 			v = v == b->s.k;
833 			break;
834 
835 		case BPF_JGT:
836 			v = (unsigned)v > b->s.k;
837 			break;
838 
839 		case BPF_JGE:
840 			v = (unsigned)v >= b->s.k;
841 			break;
842 
843 		case BPF_JSET:
844 			v &= b->s.k;
845 			break;
846 
847 		default:
848 			abort();
849 		}
850 		if (JF(b) != JT(b))
851 			done = 0;
852 		if (v)
853 			JF(b) = JT(b);
854 		else
855 			JT(b) = JF(b);
856 	}
857 }
858 
859 /*
860  * Compute the symbolic value of expression of 's', and update
861  * anything it defines in the value table 'val'.  If 'alter' is true,
862  * do various optimizations.  This code would be cleaner if symbolic
863  * evaluation and code transformations weren't folded together.
864  */
865 static void
866 opt_stmt(s, val, alter)
867 	struct stmt *s;
868 	int val[];
869 	int alter;
870 {
871 	int op;
872 	int v;
873 
874 	switch (s->code) {
875 
876 	case BPF_LD|BPF_ABS|BPF_W:
877 	case BPF_LD|BPF_ABS|BPF_H:
878 	case BPF_LD|BPF_ABS|BPF_B:
879 		v = F(s->code, s->k, 0L);
880 		vstore(s, &val[A_ATOM], v, alter);
881 		break;
882 
883 	case BPF_LD|BPF_IND|BPF_W:
884 	case BPF_LD|BPF_IND|BPF_H:
885 	case BPF_LD|BPF_IND|BPF_B:
886 		v = val[X_ATOM];
887 		if (alter && vmap[v].is_const) {
888 			s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code);
889 			s->k += vmap[v].const_val;
890 			v = F(s->code, s->k, 0L);
891 			done = 0;
892 		}
893 		else
894 			v = F(s->code, s->k, v);
895 		vstore(s, &val[A_ATOM], v, alter);
896 		break;
897 
898 	case BPF_LD|BPF_LEN:
899 		v = F(s->code, 0L, 0L);
900 		vstore(s, &val[A_ATOM], v, alter);
901 		break;
902 
903 	case BPF_LD|BPF_IMM:
904 		v = K(s->k);
905 		vstore(s, &val[A_ATOM], v, alter);
906 		break;
907 
908 	case BPF_LDX|BPF_IMM:
909 		v = K(s->k);
910 		vstore(s, &val[X_ATOM], v, alter);
911 		break;
912 
913 	case BPF_LDX|BPF_MSH|BPF_B:
914 		v = F(s->code, s->k, 0L);
915 		vstore(s, &val[X_ATOM], v, alter);
916 		break;
917 
918 	case BPF_ALU|BPF_NEG:
919 		if (alter && vmap[val[A_ATOM]].is_const) {
920 			s->code = BPF_LD|BPF_IMM;
921 			s->k = -vmap[val[A_ATOM]].const_val;
922 			val[A_ATOM] = K(s->k);
923 		}
924 		else
925 			val[A_ATOM] = F(s->code, val[A_ATOM], 0L);
926 		break;
927 
928 	case BPF_ALU|BPF_ADD|BPF_K:
929 	case BPF_ALU|BPF_SUB|BPF_K:
930 	case BPF_ALU|BPF_MUL|BPF_K:
931 	case BPF_ALU|BPF_DIV|BPF_K:
932 	case BPF_ALU|BPF_AND|BPF_K:
933 	case BPF_ALU|BPF_OR|BPF_K:
934 	case BPF_ALU|BPF_LSH|BPF_K:
935 	case BPF_ALU|BPF_RSH|BPF_K:
936 		op = BPF_OP(s->code);
937 		if (alter) {
938 			if (s->k == 0) {
939 				if (op == BPF_ADD || op == BPF_SUB ||
940 				    op == BPF_LSH || op == BPF_RSH ||
941 				    op == BPF_OR) {
942 					s->code = NOP;
943 					break;
944 				}
945 				if (op == BPF_MUL || op == BPF_AND) {
946 					s->code = BPF_LD|BPF_IMM;
947 					val[A_ATOM] = K(s->k);
948 					break;
949 				}
950 			}
951 			if (vmap[val[A_ATOM]].is_const) {
952 				fold_op(s, val[A_ATOM], K(s->k));
953 				val[A_ATOM] = K(s->k);
954 				break;
955 			}
956 		}
957 		val[A_ATOM] = F(s->code, val[A_ATOM], K(s->k));
958 		break;
959 
960 	case BPF_ALU|BPF_ADD|BPF_X:
961 	case BPF_ALU|BPF_SUB|BPF_X:
962 	case BPF_ALU|BPF_MUL|BPF_X:
963 	case BPF_ALU|BPF_DIV|BPF_X:
964 	case BPF_ALU|BPF_AND|BPF_X:
965 	case BPF_ALU|BPF_OR|BPF_X:
966 	case BPF_ALU|BPF_LSH|BPF_X:
967 	case BPF_ALU|BPF_RSH|BPF_X:
968 		op = BPF_OP(s->code);
969 		if (alter && vmap[val[X_ATOM]].is_const) {
970 			if (vmap[val[A_ATOM]].is_const) {
971 				fold_op(s, val[A_ATOM], val[X_ATOM]);
972 				val[A_ATOM] = K(s->k);
973 			}
974 			else {
975 				s->code = BPF_ALU|BPF_K|op;
976 				s->k = vmap[val[X_ATOM]].const_val;
977 				done = 0;
978 				val[A_ATOM] =
979 					F(s->code, val[A_ATOM], K(s->k));
980 			}
981 			break;
982 		}
983 		/*
984 		 * Check if we're doing something to an accumulator
985 		 * that is 0, and simplify.  This may not seem like
986 		 * much of a simplification but it could open up further
987 		 * optimizations.
988 		 * XXX We could also check for mul by 1, and -1, etc.
989 		 */
990 		if (alter && vmap[val[A_ATOM]].is_const
991 		    && vmap[val[A_ATOM]].const_val == 0) {
992 			if (op == BPF_ADD || op == BPF_OR ||
993 			    op == BPF_LSH || op == BPF_RSH || op == BPF_SUB) {
994 				s->code = BPF_MISC|BPF_TXA;
995 				vstore(s, &val[A_ATOM], val[X_ATOM], alter);
996 				break;
997 			}
998 			else if (op == BPF_MUL || op == BPF_DIV ||
999 				 op == BPF_AND) {
1000 				s->code = BPF_LD|BPF_IMM;
1001 				s->k = 0;
1002 				vstore(s, &val[A_ATOM], K(s->k), alter);
1003 				break;
1004 			}
1005 			else if (op == BPF_NEG) {
1006 				s->code = NOP;
1007 				break;
1008 			}
1009 		}
1010 		val[A_ATOM] = F(s->code, val[A_ATOM], val[X_ATOM]);
1011 		break;
1012 
1013 	case BPF_MISC|BPF_TXA:
1014 		vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1015 		break;
1016 
1017 	case BPF_LD|BPF_MEM:
1018 		v = val[s->k];
1019 		if (alter && vmap[v].is_const) {
1020 			s->code = BPF_LD|BPF_IMM;
1021 			s->k = vmap[v].const_val;
1022 			done = 0;
1023 		}
1024 		vstore(s, &val[A_ATOM], v, alter);
1025 		break;
1026 
1027 	case BPF_MISC|BPF_TAX:
1028 		vstore(s, &val[X_ATOM], val[A_ATOM], alter);
1029 		break;
1030 
1031 	case BPF_LDX|BPF_MEM:
1032 		v = val[s->k];
1033 		if (alter && vmap[v].is_const) {
1034 			s->code = BPF_LDX|BPF_IMM;
1035 			s->k = vmap[v].const_val;
1036 			done = 0;
1037 		}
1038 		vstore(s, &val[X_ATOM], v, alter);
1039 		break;
1040 
1041 	case BPF_ST:
1042 		vstore(s, &val[s->k], val[A_ATOM], alter);
1043 		break;
1044 
1045 	case BPF_STX:
1046 		vstore(s, &val[s->k], val[X_ATOM], alter);
1047 		break;
1048 	}
1049 }
1050 
1051 static void
1052 deadstmt(s, last)
1053 	register struct stmt *s;
1054 	register struct stmt *last[];
1055 {
1056 	register int atom;
1057 
1058 	atom = atomuse(s);
1059 	if (atom >= 0) {
1060 		if (atom == AX_ATOM) {
1061 			last[X_ATOM] = 0;
1062 			last[A_ATOM] = 0;
1063 		}
1064 		else
1065 			last[atom] = 0;
1066 	}
1067 	atom = atomdef(s);
1068 	if (atom >= 0) {
1069 		if (last[atom]) {
1070 			done = 0;
1071 			last[atom]->code = NOP;
1072 		}
1073 		last[atom] = s;
1074 	}
1075 }
1076 
1077 static void
1078 opt_deadstores(b)
1079 	register struct block *b;
1080 {
1081 	register struct slist *s;
1082 	register int atom;
1083 	struct stmt *last[N_ATOMS];
1084 
1085 	memset((char *)last, 0, sizeof last);
1086 
1087 	for (s = b->stmts; s != 0; s = s->next)
1088 		deadstmt(&s->s, last);
1089 	deadstmt(&b->s, last);
1090 
1091 	for (atom = 0; atom < N_ATOMS; ++atom)
1092 		if (last[atom] && !ATOMELEM(b->out_use, atom)) {
1093 			last[atom]->code = NOP;
1094 			done = 0;
1095 		}
1096 }
1097 
1098 static void
1099 opt_blk(b, do_stmts)
1100 	struct block *b;
1101 	int do_stmts;
1102 {
1103 	struct slist *s;
1104 	struct edge *p;
1105 	int i;
1106 	bpf_int32 aval;
1107 
1108 	/*
1109 	 * Initialize the atom values.
1110 	 * If we have no predecessors, everything is undefined.
1111 	 * Otherwise, we inherent our values from our predecessors.
1112 	 * If any register has an ambiguous value (i.e. control paths are
1113 	 * merging) give it the undefined value of 0.
1114 	 */
1115 	p = b->in_edges;
1116 	if (p == 0)
1117 		memset((char *)b->val, 0, sizeof(b->val));
1118 	else {
1119 		memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val));
1120 		while ((p = p->next) != NULL) {
1121 			for (i = 0; i < N_ATOMS; ++i)
1122 				if (b->val[i] != p->pred->val[i])
1123 					b->val[i] = 0;
1124 		}
1125 	}
1126 	aval = b->val[A_ATOM];
1127 	for (s = b->stmts; s; s = s->next)
1128 		opt_stmt(&s->s, b->val, do_stmts);
1129 
1130 	/*
1131 	 * This is a special case: if we don't use anything from this
1132 	 * block, and we load the accumulator with value that is
1133 	 * already there, or if this block is a return,
1134 	 * eliminate all the statements.
1135 	 */
1136 	if (do_stmts &&
1137 	    ((b->out_use == 0 && aval != 0 &&b->val[A_ATOM] == aval) ||
1138 	     BPF_CLASS(b->s.code) == BPF_RET)) {
1139 		if (b->stmts != 0) {
1140 			b->stmts = 0;
1141 			done = 0;
1142 		}
1143 	} else {
1144 		opt_peep(b);
1145 		opt_deadstores(b);
1146 	}
1147 	/*
1148 	 * Set up values for branch optimizer.
1149 	 */
1150 	if (BPF_SRC(b->s.code) == BPF_K)
1151 		b->oval = K(b->s.k);
1152 	else
1153 		b->oval = b->val[X_ATOM];
1154 	b->et.code = b->s.code;
1155 	b->ef.code = -b->s.code;
1156 }
1157 
1158 /*
1159  * Return true if any register that is used on exit from 'succ', has
1160  * an exit value that is different from the corresponding exit value
1161  * from 'b'.
1162  */
1163 static int
1164 use_conflict(b, succ)
1165 	struct block *b, *succ;
1166 {
1167 	int atom;
1168 	atomset use = succ->out_use;
1169 
1170 	if (use == 0)
1171 		return 0;
1172 
1173 	for (atom = 0; atom < N_ATOMS; ++atom)
1174 		if (ATOMELEM(use, atom))
1175 			if (b->val[atom] != succ->val[atom])
1176 				return 1;
1177 	return 0;
1178 }
1179 
1180 static struct block *
1181 fold_edge(child, ep)
1182 	struct block *child;
1183 	struct edge *ep;
1184 {
1185 	int sense;
1186 	int aval0, aval1, oval0, oval1;
1187 	int code = ep->code;
1188 
1189 	if (code < 0) {
1190 		code = -code;
1191 		sense = 0;
1192 	} else
1193 		sense = 1;
1194 
1195 	if (child->s.code != code)
1196 		return 0;
1197 
1198 	aval0 = child->val[A_ATOM];
1199 	oval0 = child->oval;
1200 	aval1 = ep->pred->val[A_ATOM];
1201 	oval1 = ep->pred->oval;
1202 
1203 	if (aval0 != aval1)
1204 		return 0;
1205 
1206 	if (oval0 == oval1)
1207 		/*
1208 		 * The operands are identical, so the
1209 		 * result is true if a true branch was
1210 		 * taken to get here, otherwise false.
1211 		 */
1212 		return sense ? JT(child) : JF(child);
1213 
1214 	if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K))
1215 		/*
1216 		 * At this point, we only know the comparison if we
1217 		 * came down the true branch, and it was an equality
1218 		 * comparison with a constant.  We rely on the fact that
1219 		 * distinct constants have distinct value numbers.
1220 		 */
1221 		return JF(child);
1222 
1223 	return 0;
1224 }
1225 
1226 static void
1227 opt_j(ep)
1228 	struct edge *ep;
1229 {
1230 	register int i, k;
1231 	register struct block *target;
1232 
1233 	if (JT(ep->succ) == 0)
1234 		return;
1235 
1236 	if (JT(ep->succ) == JF(ep->succ)) {
1237 		/*
1238 		 * Common branch targets can be eliminated, provided
1239 		 * there is no data dependency.
1240 		 */
1241 		if (!use_conflict(ep->pred, ep->succ->et.succ)) {
1242 			done = 0;
1243 			ep->succ = JT(ep->succ);
1244 		}
1245 	}
1246 	/*
1247 	 * For each edge dominator that matches the successor of this
1248 	 * edge, promote the edge successor to the its grandchild.
1249 	 *
1250 	 * XXX We violate the set abstraction here in favor a reasonably
1251 	 * efficient loop.
1252 	 */
1253  top:
1254 	for (i = 0; i < edgewords; ++i) {
1255 		register bpf_u_int32 x = ep->edom[i];
1256 
1257 		while (x != 0) {
1258 			k = ffs(x) - 1;
1259 			x &=~ (1 << k);
1260 			k += i * BITS_PER_WORD;
1261 
1262 			target = fold_edge(ep->succ, edges[k]);
1263 			/*
1264 			 * Check that there is no data dependency between
1265 			 * nodes that will be violated if we move the edge.
1266 			 */
1267 			if (target != 0 && !use_conflict(ep->pred, target)) {
1268 				done = 0;
1269 				ep->succ = target;
1270 				if (JT(target) != 0)
1271 					/*
1272 					 * Start over unless we hit a leaf.
1273 					 */
1274 					goto top;
1275 				return;
1276 			}
1277 		}
1278 	}
1279 }
1280 
1281 
1282 static void
1283 or_pullup(b)
1284 	struct block *b;
1285 {
1286 	int val, at_top;
1287 	struct block *pull;
1288 	struct block **diffp, **samep;
1289 	struct edge *ep;
1290 
1291 	ep = b->in_edges;
1292 	if (ep == 0)
1293 		return;
1294 
1295 	/*
1296 	 * Make sure each predecessor loads the same value.
1297 	 * XXX why?
1298 	 */
1299 	val = ep->pred->val[A_ATOM];
1300 	for (ep = ep->next; ep != 0; ep = ep->next)
1301 		if (val != ep->pred->val[A_ATOM])
1302 			return;
1303 
1304 	if (JT(b->in_edges->pred) == b)
1305 		diffp = &JT(b->in_edges->pred);
1306 	else
1307 		diffp = &JF(b->in_edges->pred);
1308 
1309 	at_top = 1;
1310 	while (1) {
1311 		if (*diffp == 0)
1312 			return;
1313 
1314 		if (JT(*diffp) != JT(b))
1315 			return;
1316 
1317 		if (!SET_MEMBER((*diffp)->dom, b->id))
1318 			return;
1319 
1320 		if ((*diffp)->val[A_ATOM] != val)
1321 			break;
1322 
1323 		diffp = &JF(*diffp);
1324 		at_top = 0;
1325 	}
1326 	samep = &JF(*diffp);
1327 	while (1) {
1328 		if (*samep == 0)
1329 			return;
1330 
1331 		if (JT(*samep) != JT(b))
1332 			return;
1333 
1334 		if (!SET_MEMBER((*samep)->dom, b->id))
1335 			return;
1336 
1337 		if ((*samep)->val[A_ATOM] == val)
1338 			break;
1339 
1340 		/* XXX Need to check that there are no data dependencies
1341 		   between dp0 and dp1.  Currently, the code generator
1342 		   will not produce such dependencies. */
1343 		samep = &JF(*samep);
1344 	}
1345 #ifdef notdef
1346 	/* XXX This doesn't cover everything. */
1347 	for (i = 0; i < N_ATOMS; ++i)
1348 		if ((*samep)->val[i] != pred->val[i])
1349 			return;
1350 #endif
1351 	/* Pull up the node. */
1352 	pull = *samep;
1353 	*samep = JF(pull);
1354 	JF(pull) = *diffp;
1355 
1356 	/*
1357 	 * At the top of the chain, each predecessor needs to point at the
1358 	 * pulled up node.  Inside the chain, there is only one predecessor
1359 	 * to worry about.
1360 	 */
1361 	if (at_top) {
1362 		for (ep = b->in_edges; ep != 0; ep = ep->next) {
1363 			if (JT(ep->pred) == b)
1364 				JT(ep->pred) = pull;
1365 			else
1366 				JF(ep->pred) = pull;
1367 		}
1368 	}
1369 	else
1370 		*diffp = pull;
1371 
1372 	done = 0;
1373 }
1374 
1375 static void
1376 and_pullup(b)
1377 	struct block *b;
1378 {
1379 	int val, at_top;
1380 	struct block *pull;
1381 	struct block **diffp, **samep;
1382 	struct edge *ep;
1383 
1384 	ep = b->in_edges;
1385 	if (ep == 0)
1386 		return;
1387 
1388 	/*
1389 	 * Make sure each predecessor loads the same value.
1390 	 */
1391 	val = ep->pred->val[A_ATOM];
1392 	for (ep = ep->next; ep != 0; ep = ep->next)
1393 		if (val != ep->pred->val[A_ATOM])
1394 			return;
1395 
1396 	if (JT(b->in_edges->pred) == b)
1397 		diffp = &JT(b->in_edges->pred);
1398 	else
1399 		diffp = &JF(b->in_edges->pred);
1400 
1401 	at_top = 1;
1402 	while (1) {
1403 		if (*diffp == 0)
1404 			return;
1405 
1406 		if (JF(*diffp) != JF(b))
1407 			return;
1408 
1409 		if (!SET_MEMBER((*diffp)->dom, b->id))
1410 			return;
1411 
1412 		if ((*diffp)->val[A_ATOM] != val)
1413 			break;
1414 
1415 		diffp = &JT(*diffp);
1416 		at_top = 0;
1417 	}
1418 	samep = &JT(*diffp);
1419 	while (1) {
1420 		if (*samep == 0)
1421 			return;
1422 
1423 		if (JF(*samep) != JF(b))
1424 			return;
1425 
1426 		if (!SET_MEMBER((*samep)->dom, b->id))
1427 			return;
1428 
1429 		if ((*samep)->val[A_ATOM] == val)
1430 			break;
1431 
1432 		/* XXX Need to check that there are no data dependencies
1433 		   between diffp and samep.  Currently, the code generator
1434 		   will not produce such dependencies. */
1435 		samep = &JT(*samep);
1436 	}
1437 #ifdef notdef
1438 	/* XXX This doesn't cover everything. */
1439 	for (i = 0; i < N_ATOMS; ++i)
1440 		if ((*samep)->val[i] != pred->val[i])
1441 			return;
1442 #endif
1443 	/* Pull up the node. */
1444 	pull = *samep;
1445 	*samep = JT(pull);
1446 	JT(pull) = *diffp;
1447 
1448 	/*
1449 	 * At the top of the chain, each predecessor needs to point at the
1450 	 * pulled up node.  Inside the chain, there is only one predecessor
1451 	 * to worry about.
1452 	 */
1453 	if (at_top) {
1454 		for (ep = b->in_edges; ep != 0; ep = ep->next) {
1455 			if (JT(ep->pred) == b)
1456 				JT(ep->pred) = pull;
1457 			else
1458 				JF(ep->pred) = pull;
1459 		}
1460 	}
1461 	else
1462 		*diffp = pull;
1463 
1464 	done = 0;
1465 }
1466 
1467 static void
1468 opt_blks(root, do_stmts)
1469 	struct block *root;
1470 	int do_stmts;
1471 {
1472 	int i, maxlevel;
1473 	struct block *p;
1474 
1475 	init_val();
1476 	maxlevel = root->level;
1477 	for (i = maxlevel; i >= 0; --i)
1478 		for (p = levels[i]; p; p = p->link)
1479 			opt_blk(p, do_stmts);
1480 
1481 	if (do_stmts)
1482 		/*
1483 		 * No point trying to move branches; it can't possibly
1484 		 * make a difference at this point.
1485 		 */
1486 		return;
1487 
1488 	for (i = 1; i <= maxlevel; ++i) {
1489 		for (p = levels[i]; p; p = p->link) {
1490 			opt_j(&p->et);
1491 			opt_j(&p->ef);
1492 		}
1493 	}
1494 	for (i = 1; i <= maxlevel; ++i) {
1495 		for (p = levels[i]; p; p = p->link) {
1496 			or_pullup(p);
1497 			and_pullup(p);
1498 		}
1499 	}
1500 }
1501 
1502 static __inline void
1503 link_inedge(parent, child)
1504 	struct edge *parent;
1505 	struct block *child;
1506 {
1507 	parent->next = child->in_edges;
1508 	child->in_edges = parent;
1509 }
1510 
1511 static void
1512 find_inedges(root)
1513 	struct block *root;
1514 {
1515 	int i;
1516 	struct block *b;
1517 
1518 	for (i = 0; i < n_blocks; ++i)
1519 		blocks[i]->in_edges = 0;
1520 
1521 	/*
1522 	 * Traverse the graph, adding each edge to the predecessor
1523 	 * list of its successors.  Skip the leaves (i.e. level 0).
1524 	 */
1525 	for (i = root->level; i > 0; --i) {
1526 		for (b = levels[i]; b != 0; b = b->link) {
1527 			link_inedge(&b->et, JT(b));
1528 			link_inedge(&b->ef, JF(b));
1529 		}
1530 	}
1531 }
1532 
1533 static void
1534 opt_root(b)
1535 	struct block **b;
1536 {
1537 	struct slist *tmp, *s;
1538 
1539 	s = (*b)->stmts;
1540 	(*b)->stmts = 0;
1541 	while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b))
1542 		*b = JT(*b);
1543 
1544 	tmp = (*b)->stmts;
1545 	if (tmp != 0)
1546 		sappend(s, tmp);
1547 	(*b)->stmts = s;
1548 
1549 	/*
1550 	 * If the root node is a return, then there is no
1551 	 * point executing any statements (since the bpf machine
1552 	 * has no side effects).
1553 	 */
1554 	if (BPF_CLASS((*b)->s.code) == BPF_RET)
1555 		(*b)->stmts = 0;
1556 }
1557 
1558 static void
1559 opt_loop(root, do_stmts)
1560 	struct block *root;
1561 	int do_stmts;
1562 {
1563 
1564 #ifdef BDEBUG
1565 	if (dflag > 1)
1566 		opt_dump(root);
1567 #endif
1568 	do {
1569 		done = 1;
1570 		find_levels(root);
1571 		find_dom(root);
1572 		find_closure(root);
1573 		find_inedges(root);
1574 		find_ud(root);
1575 		find_edom(root);
1576 		opt_blks(root, do_stmts);
1577 #ifdef BDEBUG
1578 		if (dflag > 1)
1579 			opt_dump(root);
1580 #endif
1581 	} while (!done);
1582 }
1583 
1584 /*
1585  * Optimize the filter code in its dag representation.
1586  */
1587 void
1588 bpf_optimize(rootp)
1589 	struct block **rootp;
1590 {
1591 	struct block *root;
1592 
1593 	root = *rootp;
1594 
1595 	opt_init(root);
1596 	opt_loop(root, 0);
1597 	opt_loop(root, 1);
1598 	intern_blocks(root);
1599 	opt_root(rootp);
1600 	opt_cleanup();
1601 }
1602 
1603 static void
1604 make_marks(p)
1605 	struct block *p;
1606 {
1607 	if (!isMarked(p)) {
1608 		Mark(p);
1609 		if (BPF_CLASS(p->s.code) != BPF_RET) {
1610 			make_marks(JT(p));
1611 			make_marks(JF(p));
1612 		}
1613 	}
1614 }
1615 
1616 /*
1617  * Mark code array such that isMarked(i) is true
1618  * only for nodes that are alive.
1619  */
1620 static void
1621 mark_code(p)
1622 	struct block *p;
1623 {
1624 	cur_mark += 1;
1625 	make_marks(p);
1626 }
1627 
1628 /*
1629  * True iff the two stmt lists load the same value from the packet into
1630  * the accumulator.
1631  */
1632 static int
1633 eq_slist(x, y)
1634 	struct slist *x, *y;
1635 {
1636 	while (1) {
1637 		while (x && x->s.code == NOP)
1638 			x = x->next;
1639 		while (y && y->s.code == NOP)
1640 			y = y->next;
1641 		if (x == 0)
1642 			return y == 0;
1643 		if (y == 0)
1644 			return x == 0;
1645 		if (x->s.code != y->s.code || x->s.k != y->s.k)
1646 			return 0;
1647 		x = x->next;
1648 		y = y->next;
1649 	}
1650 }
1651 
1652 static __inline int
1653 eq_blk(b0, b1)
1654 	struct block *b0, *b1;
1655 {
1656 	if (b0->s.code == b1->s.code &&
1657 	    b0->s.k == b1->s.k &&
1658 	    b0->et.succ == b1->et.succ &&
1659 	    b0->ef.succ == b1->ef.succ)
1660 		return eq_slist(b0->stmts, b1->stmts);
1661 	return 0;
1662 }
1663 
1664 static void
1665 intern_blocks(root)
1666 	struct block *root;
1667 {
1668 	struct block *p;
1669 	int i, j;
1670 	int done;
1671  top:
1672 	done = 1;
1673 	for (i = 0; i < n_blocks; ++i)
1674 		blocks[i]->link = 0;
1675 
1676 	mark_code(root);
1677 
1678 	for (i = n_blocks - 1; --i >= 0; ) {
1679 		if (!isMarked(blocks[i]))
1680 			continue;
1681 		for (j = i + 1; j < n_blocks; ++j) {
1682 			if (!isMarked(blocks[j]))
1683 				continue;
1684 			if (eq_blk(blocks[i], blocks[j])) {
1685 				blocks[i]->link = blocks[j]->link ?
1686 					blocks[j]->link : blocks[j];
1687 				break;
1688 			}
1689 		}
1690 	}
1691 	for (i = 0; i < n_blocks; ++i) {
1692 		p = blocks[i];
1693 		if (JT(p) == 0)
1694 			continue;
1695 		if (JT(p)->link) {
1696 			done = 0;
1697 			JT(p) = JT(p)->link;
1698 		}
1699 		if (JF(p)->link) {
1700 			done = 0;
1701 			JF(p) = JF(p)->link;
1702 		}
1703 	}
1704 	if (!done)
1705 		goto top;
1706 }
1707 
1708 static void
1709 opt_cleanup()
1710 {
1711 	free((void *)vnode_base);
1712 	free((void *)vmap);
1713 	free((void *)edges);
1714 	free((void *)space);
1715 	free((void *)levels);
1716 	free((void *)blocks);
1717 }
1718 
1719 /*
1720  * Return the number of stmts in 's'.
1721  */
1722 static int
1723 slength(s)
1724 	struct slist *s;
1725 {
1726 	int n = 0;
1727 
1728 	for (; s; s = s->next)
1729 		if (s->s.code != NOP)
1730 			++n;
1731 	return n;
1732 }
1733 
1734 /*
1735  * Return the number of nodes reachable by 'p'.
1736  * All nodes should be initially unmarked.
1737  */
1738 static int
1739 count_blocks(p)
1740 	struct block *p;
1741 {
1742 	if (p == 0 || isMarked(p))
1743 		return 0;
1744 	Mark(p);
1745 	return count_blocks(JT(p)) + count_blocks(JF(p)) + 1;
1746 }
1747 
1748 /*
1749  * Do a depth first search on the flow graph, numbering the
1750  * the basic blocks, and entering them into the 'blocks' array.`
1751  */
1752 static void
1753 number_blks_r(p)
1754 	struct block *p;
1755 {
1756 	int n;
1757 
1758 	if (p == 0 || isMarked(p))
1759 		return;
1760 
1761 	Mark(p);
1762 	n = n_blocks++;
1763 	p->id = n;
1764 	blocks[n] = p;
1765 
1766 	number_blks_r(JT(p));
1767 	number_blks_r(JF(p));
1768 }
1769 
1770 /*
1771  * Return the number of stmts in the flowgraph reachable by 'p'.
1772  * The nodes should be unmarked before calling.
1773  */
1774 static int
1775 count_stmts(p)
1776 	struct block *p;
1777 {
1778 	int n;
1779 
1780 	if (p == 0 || isMarked(p))
1781 		return 0;
1782 	Mark(p);
1783 	n = count_stmts(JT(p)) + count_stmts(JF(p));
1784 	return slength(p->stmts) + n + 1;
1785 }
1786 
1787 /*
1788  * Allocate memory.  All allocation is done before optimization
1789  * is begun.  A linear bound on the size of all data structures is computed
1790  * from the total number of blocks and/or statements.
1791  */
1792 static void
1793 opt_init(root)
1794 	struct block *root;
1795 {
1796 	bpf_u_int32 *p;
1797 	int i, n, max_stmts;
1798 
1799 	/*
1800 	 * First, count the blocks, so we can malloc an array to map
1801 	 * block number to block.  Then, put the blocks into the array.
1802 	 */
1803 	unMarkAll();
1804 	n = count_blocks(root);
1805 	blocks = (struct block **)malloc(n * sizeof(*blocks));
1806 	unMarkAll();
1807 	n_blocks = 0;
1808 	number_blks_r(root);
1809 
1810 	n_edges = 2 * n_blocks;
1811 	edges = (struct edge **)malloc(n_edges * sizeof(*edges));
1812 
1813 	/*
1814 	 * The number of levels is bounded by the number of nodes.
1815 	 */
1816 	levels = (struct block **)malloc(n_blocks * sizeof(*levels));
1817 
1818 	edgewords = n_edges / (8 * sizeof(bpf_u_int32)) + 1;
1819 	nodewords = n_blocks / (8 * sizeof(bpf_u_int32)) + 1;
1820 
1821 	/* XXX */
1822 	space = (bpf_u_int32 *)malloc(2 * n_blocks * nodewords * sizeof(*space)
1823 				 + n_edges * edgewords * sizeof(*space));
1824 	p = space;
1825 	all_dom_sets = p;
1826 	for (i = 0; i < n; ++i) {
1827 		blocks[i]->dom = p;
1828 		p += nodewords;
1829 	}
1830 	all_closure_sets = p;
1831 	for (i = 0; i < n; ++i) {
1832 		blocks[i]->closure = p;
1833 		p += nodewords;
1834 	}
1835 	all_edge_sets = p;
1836 	for (i = 0; i < n; ++i) {
1837 		register struct block *b = blocks[i];
1838 
1839 		b->et.edom = p;
1840 		p += edgewords;
1841 		b->ef.edom = p;
1842 		p += edgewords;
1843 		b->et.id = i;
1844 		edges[i] = &b->et;
1845 		b->ef.id = n_blocks + i;
1846 		edges[n_blocks + i] = &b->ef;
1847 		b->et.pred = b;
1848 		b->ef.pred = b;
1849 	}
1850 	max_stmts = 0;
1851 	for (i = 0; i < n; ++i)
1852 		max_stmts += slength(blocks[i]->stmts) + 1;
1853 	/*
1854 	 * We allocate at most 3 value numbers per statement,
1855 	 * so this is an upper bound on the number of valnodes
1856 	 * we'll need.
1857 	 */
1858 	maxval = 3 * max_stmts;
1859 	vmap = (struct vmapinfo *)malloc(maxval * sizeof(*vmap));
1860 	vnode_base = (struct valnode *)malloc(maxval * sizeof(*vmap));
1861 }
1862 
1863 /*
1864  * Some pointers used to convert the basic block form of the code,
1865  * into the array form that BPF requires.  'fstart' will point to
1866  * the malloc'd array while 'ftail' is used during the recursive traversal.
1867  */
1868 static struct bpf_insn *fstart;
1869 static struct bpf_insn *ftail;
1870 
1871 #ifdef BDEBUG
1872 int bids[1000];
1873 #endif
1874 
1875 /*
1876  * Returns true if successful.  Returns false if a branch has
1877  * an offset that is too large.  If so, we have marked that
1878  * branch so that on a subsequent iteration, it will be treated
1879  * properly.
1880  */
1881 static int
1882 convert_code_r(p)
1883 	struct block *p;
1884 {
1885 	struct bpf_insn *dst;
1886 	struct slist *src;
1887 	int slen;
1888 	u_int off;
1889 	int extrajmps;		/* number of extra jumps inserted */
1890 
1891 	if (p == 0 || isMarked(p))
1892 		return (1);
1893 	Mark(p);
1894 
1895 	if (convert_code_r(JF(p)) == 0)
1896 		return (0);
1897 	if (convert_code_r(JT(p)) == 0)
1898 		return (0);
1899 
1900 	slen = slength(p->stmts);
1901 	dst = ftail -= (slen + 1 + p->longjt + p->longjf);
1902 		/* inflate length by any extra jumps */
1903 
1904 	p->offset = dst - fstart;
1905 
1906 	for (src = p->stmts; src; src = src->next) {
1907 		if (src->s.code == NOP)
1908 			continue;
1909 		dst->code = (u_short)src->s.code;
1910 		dst->k = src->s.k;
1911 		++dst;
1912 	}
1913 #ifdef BDEBUG
1914 	bids[dst - fstart] = p->id + 1;
1915 #endif
1916 	dst->code = (u_short)p->s.code;
1917 	dst->k = p->s.k;
1918 	if (JT(p)) {
1919 		extrajmps = 0;
1920 		off = JT(p)->offset - (p->offset + slen) - 1;
1921 		if (off >= 256) {
1922 		    /* offset too large for branch, must add a jump */
1923 		    if (p->longjt == 0) {
1924 		    	/* mark this instruction and retry */
1925 			p->longjt++;
1926 			return(0);
1927 		    }
1928 		    /* branch if T to following jump */
1929 		    dst->jt = extrajmps;
1930 		    extrajmps++;
1931 		    dst[extrajmps].code = BPF_JMP|BPF_JA;
1932 		    dst[extrajmps].k = off - extrajmps;
1933 		}
1934 		else
1935 		    dst->jt = off;
1936 		off = JF(p)->offset - (p->offset + slen) - 1;
1937 		if (off >= 256) {
1938 		    /* offset too large for branch, must add a jump */
1939 		    if (p->longjf == 0) {
1940 		    	/* mark this instruction and retry */
1941 			p->longjf++;
1942 			return(0);
1943 		    }
1944 		    /* branch if F to following jump */
1945 		    /* if two jumps are inserted, F goes to second one */
1946 		    dst->jf = extrajmps;
1947 		    extrajmps++;
1948 		    dst[extrajmps].code = BPF_JMP|BPF_JA;
1949 		    dst[extrajmps].k = off - extrajmps;
1950 		}
1951 		else
1952 		    dst->jf = off;
1953 	}
1954 	return (1);
1955 }
1956 
1957 
1958 /*
1959  * Convert flowgraph intermediate representation to the
1960  * BPF array representation.  Set *lenp to the number of instructions.
1961  */
1962 struct bpf_insn *
1963 icode_to_fcode(root, lenp)
1964 	struct block *root;
1965 	int *lenp;
1966 {
1967 	int n;
1968 	struct bpf_insn *fp;
1969 
1970 	/*
1971 	 * Loop doing convert_codr_r() until no branches remain
1972 	 * with too-large offsets.
1973 	 */
1974 	while (1) {
1975 	    unMarkAll();
1976 	    n = *lenp = count_stmts(root);
1977 
1978 	    fp = (struct bpf_insn *)malloc(sizeof(*fp) * n);
1979 	    memset((char *)fp, 0, sizeof(*fp) * n);
1980 	    fstart = fp;
1981 	    ftail = fp + n;
1982 
1983 	    unMarkAll();
1984 	    if (convert_code_r(root))
1985 		break;
1986 	    free(fp);
1987 	}
1988 
1989 	return fp;
1990 }
1991 
1992 #ifdef BDEBUG
1993 static void
1994 opt_dump(root)
1995 	struct block *root;
1996 {
1997 	struct bpf_program f;
1998 
1999 	memset(bids, 0, sizeof bids);
2000 	f.bf_insns = icode_to_fcode(root, &f.bf_len);
2001 	bpf_dump(&f, 1);
2002 	putchar('\n');
2003 	free((char *)f.bf_insns);
2004 }
2005 #endif
2006