xref: /netbsd-src/external/bsd/libpcap/dist/optimize.c (revision bdc22b2e01993381dcefeff2bc9b56ca75a4235c)
1 /*	$NetBSD: optimize.c,v 1.9 2017/01/24 22:29:28 christos Exp $	*/
2 
3 /*
4  * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that: (1) source code distributions
9  * retain the above copyright notice and this paragraph in its entirety, (2)
10  * distributions including binary code include the above copyright notice and
11  * this paragraph in its entirety in the documentation or other materials
12  * provided with the distribution, and (3) all advertising materials mentioning
13  * features or use of this software display the following acknowledgement:
14  * ``This product includes software developed by the University of California,
15  * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
16  * the University nor the names of its contributors may be used to endorse
17  * or promote products derived from this software without specific prior
18  * written permission.
19  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
20  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
21  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
22  *
23  *  Optimization module for tcpdump intermediate representation.
24  */
25 
26 #include <sys/cdefs.h>
27 __RCSID("$NetBSD: optimize.c,v 1.9 2017/01/24 22:29:28 christos Exp $");
28 
29 #ifdef HAVE_CONFIG_H
30 #include "config.h"
31 #endif
32 
33 #ifdef _WIN32
34 #include <pcap-stdinc.h>
35 #else /* _WIN32 */
36 #if HAVE_INTTYPES_H
37 #include <inttypes.h>
38 #elif HAVE_STDINT_H
39 #include <stdint.h>
40 #endif
41 #ifdef HAVE_SYS_BITYPES_H
42 #include <sys/bitypes.h>
43 #endif
44 #include <sys/types.h>
45 #endif /* _WIN32 */
46 
47 #include <stdio.h>
48 #include <stdlib.h>
49 #include <memory.h>
50 #include <string.h>
51 
52 #include <errno.h>
53 
54 #include "pcap-int.h"
55 
56 #include "gencode.h"
57 
58 #ifdef HAVE_OS_PROTO_H
59 #include "os-proto.h"
60 #endif
61 
62 #ifdef BDEBUG
63 int pcap_optimizer_debug;
64 #endif
65 
66 #if defined(MSDOS) && !defined(__DJGPP__)
67 extern int _w32_ffs (int mask);
68 #define ffs _w32_ffs
69 #endif
70 
71 /*
72  * So is the check for _MSC_VER done because MinGW has this?
73  */
74 #if defined(_WIN32) && defined (_MSC_VER)
75 /*
76  * ffs -- vax ffs instruction
77  *
78  * XXX - with versions of VS that have it, use _BitScanForward()?
79  */
80 static int
81 ffs(int mask)
82 {
83 	int bit;
84 
85 	if (mask == 0)
86 		return(0);
87 	for (bit = 1; !(mask & 1); bit++)
88 		mask >>= 1;
89 	return(bit);
90 }
91 #endif
92 
93 /*
94  * Represents a deleted instruction.
95  */
96 #define NOP -1
97 
98 /*
99  * Register numbers for use-def values.
100  * 0 through BPF_MEMWORDS-1 represent the corresponding scratch memory
101  * location.  A_ATOM is the accumulator and X_ATOM is the index
102  * register.
103  */
104 #define A_ATOM BPF_MEMWORDS
105 #define X_ATOM (BPF_MEMWORDS+1)
106 
107 /*
108  * This define is used to represent *both* the accumulator and
109  * x register in use-def computations.
110  * Currently, the use-def code assumes only one definition per instruction.
111  */
112 #define AX_ATOM N_ATOMS
113 
114 /*
115  * These data structures are used in a Cocke and Shwarz style
116  * value numbering scheme.  Since the flowgraph is acyclic,
117  * exit values can be propagated from a node's predecessors
118  * provided it is uniquely defined.
119  */
120 struct valnode {
121 	int code;
122 	int v0, v1;
123 	int val;
124 	struct valnode *next;
125 };
126 
127 /* Integer constants mapped with the load immediate opcode. */
128 #define K(i) F(opt_state, BPF_LD|BPF_IMM|BPF_W, i, 0L)
129 
130 struct vmapinfo {
131 	int is_const;
132 	bpf_int32 const_val;
133 };
134 
135 struct _opt_state {
136 	/*
137 	 * A flag to indicate that further optimization is needed.
138 	 * Iterative passes are continued until a given pass yields no
139 	 * branch movement.
140 	 */
141 	int done;
142 
143 	int n_blocks;
144 	struct block **blocks;
145 	int n_edges;
146 	struct edge **edges;
147 
148 	/*
149 	 * A bit vector set representation of the dominators.
150 	 * We round up the set size to the next power of two.
151 	 */
152 	int nodewords;
153 	int edgewords;
154 	struct block **levels;
155 	bpf_u_int32 *space;
156 
157 #define BITS_PER_WORD (8*sizeof(bpf_u_int32))
158 /*
159  * True if a is in uset {p}
160  */
161 #define SET_MEMBER(p, a) \
162 ((p)[(unsigned)(a) / BITS_PER_WORD] & (1 << ((unsigned)(a) % BITS_PER_WORD)))
163 
164 /*
165  * Add 'a' to uset p.
166  */
167 #define SET_INSERT(p, a) \
168 (p)[(unsigned)(a) / BITS_PER_WORD] |= (1 << ((unsigned)(a) % BITS_PER_WORD))
169 
170 /*
171  * Delete 'a' from uset p.
172  */
173 #define SET_DELETE(p, a) \
174 (p)[(unsigned)(a) / BITS_PER_WORD] &= ~(1 << ((unsigned)(a) % BITS_PER_WORD))
175 
176 /*
177  * a := a intersect b
178  */
179 #define SET_INTERSECT(a, b, n)\
180 {\
181 	register bpf_u_int32 *_x = a, *_y = b;\
182 	register int _n = n;\
183 	while (--_n >= 0) *_x++ &= *_y++;\
184 }
185 
186 /*
187  * a := a - b
188  */
189 #define SET_SUBTRACT(a, b, n)\
190 {\
191 	register bpf_u_int32 *_x = a, *_y = b;\
192 	register int _n = n;\
193 	while (--_n >= 0) *_x++ &=~ *_y++;\
194 }
195 
196 /*
197  * a := a union b
198  */
199 #define SET_UNION(a, b, n)\
200 {\
201 	register bpf_u_int32 *_x = a, *_y = b;\
202 	register int _n = n;\
203 	while (--_n >= 0) *_x++ |= *_y++;\
204 }
205 
206 	uset all_dom_sets;
207 	uset all_closure_sets;
208 	uset all_edge_sets;
209 
210 #define MODULUS 213
211 	struct valnode *hashtbl[MODULUS];
212 	int curval;
213 	int maxval;
214 
215 	struct vmapinfo *vmap;
216 	struct valnode *vnode_base;
217 	struct valnode *next_vnode;
218 };
219 
220 typedef struct {
221 	/*
222 	 * Some pointers used to convert the basic block form of the code,
223 	 * into the array form that BPF requires.  'fstart' will point to
224 	 * the malloc'd array while 'ftail' is used during the recursive
225 	 * traversal.
226 	 */
227 	struct bpf_insn *fstart;
228 	struct bpf_insn *ftail;
229 } conv_state_t;
230 
231 static void opt_init(compiler_state_t *, opt_state_t *, struct icode *);
232 static void opt_cleanup(opt_state_t *);
233 
234 static void intern_blocks(opt_state_t *, struct icode *);
235 
236 static void find_inedges(opt_state_t *, struct block *);
237 #ifdef BDEBUG
238 static void opt_dump(compiler_state_t *, struct icode *);
239 #endif
240 
241 #ifndef MAX
242 #define MAX(a,b) ((a)>(b)?(a):(b))
243 #endif
244 
245 static void
246 find_levels_r(opt_state_t *opt_state, struct icode *ic, struct block *b)
247 {
248 	int level;
249 
250 	if (isMarked(ic, b))
251 		return;
252 
253 	Mark(ic, b);
254 	b->link = 0;
255 
256 	if (JT(b)) {
257 		find_levels_r(opt_state, ic, JT(b));
258 		find_levels_r(opt_state, ic, JF(b));
259 		level = MAX(JT(b)->level, JF(b)->level) + 1;
260 	} else
261 		level = 0;
262 	b->level = level;
263 	b->link = opt_state->levels[level];
264 	opt_state->levels[level] = b;
265 }
266 
267 /*
268  * Level graph.  The levels go from 0 at the leaves to
269  * N_LEVELS at the root.  The opt_state->levels[] array points to the
270  * first node of the level list, whose elements are linked
271  * with the 'link' field of the struct block.
272  */
273 static void
274 find_levels(opt_state_t *opt_state, struct icode *ic)
275 {
276 	memset((char *)opt_state->levels, 0, opt_state->n_blocks * sizeof(*opt_state->levels));
277 	unMarkAll(ic);
278 	find_levels_r(opt_state, ic, ic->root);
279 }
280 
281 /*
282  * Find dominator relationships.
283  * Assumes graph has been leveled.
284  */
285 static void
286 find_dom(opt_state_t *opt_state, struct block *root)
287 {
288 	int i;
289 	struct block *b;
290 	bpf_u_int32 *x;
291 
292 	/*
293 	 * Initialize sets to contain all nodes.
294 	 */
295 	x = opt_state->all_dom_sets;
296 	i = opt_state->n_blocks * opt_state->nodewords;
297 	while (--i >= 0)
298 		*x++ = ~0;
299 	/* Root starts off empty. */
300 	for (i = opt_state->nodewords; --i >= 0;)
301 		root->dom[i] = 0;
302 
303 	/* root->level is the highest level no found. */
304 	for (i = root->level; i >= 0; --i) {
305 		for (b = opt_state->levels[i]; b; b = b->link) {
306 			SET_INSERT(b->dom, b->id);
307 			if (JT(b) == 0)
308 				continue;
309 			SET_INTERSECT(JT(b)->dom, b->dom, opt_state->nodewords);
310 			SET_INTERSECT(JF(b)->dom, b->dom, opt_state->nodewords);
311 		}
312 	}
313 }
314 
315 static void
316 propedom(opt_state_t *opt_state, struct edge *ep)
317 {
318 	SET_INSERT(ep->edom, ep->id);
319 	if (ep->succ) {
320 		SET_INTERSECT(ep->succ->et.edom, ep->edom, opt_state->edgewords);
321 		SET_INTERSECT(ep->succ->ef.edom, ep->edom, opt_state->edgewords);
322 	}
323 }
324 
325 /*
326  * Compute edge dominators.
327  * Assumes graph has been leveled and predecessors established.
328  */
329 static void
330 find_edom(opt_state_t *opt_state, struct block *root)
331 {
332 	int i;
333 	uset x;
334 	struct block *b;
335 
336 	x = opt_state->all_edge_sets;
337 	for (i = opt_state->n_edges * opt_state->edgewords; --i >= 0; )
338 		x[i] = ~0;
339 
340 	/* root->level is the highest level no found. */
341 	memset(root->et.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
342 	memset(root->ef.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
343 	for (i = root->level; i >= 0; --i) {
344 		for (b = opt_state->levels[i]; b != 0; b = b->link) {
345 			propedom(opt_state, &b->et);
346 			propedom(opt_state, &b->ef);
347 		}
348 	}
349 }
350 
351 /*
352  * Find the backwards transitive closure of the flow graph.  These sets
353  * are backwards in the sense that we find the set of nodes that reach
354  * a given node, not the set of nodes that can be reached by a node.
355  *
356  * Assumes graph has been leveled.
357  */
358 static void
359 find_closure(opt_state_t *opt_state, struct block *root)
360 {
361 	int i;
362 	struct block *b;
363 
364 	/*
365 	 * Initialize sets to contain no nodes.
366 	 */
367 	memset((char *)opt_state->all_closure_sets, 0,
368 	      opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->all_closure_sets));
369 
370 	/* root->level is the highest level no found. */
371 	for (i = root->level; i >= 0; --i) {
372 		for (b = opt_state->levels[i]; b; b = b->link) {
373 			SET_INSERT(b->closure, b->id);
374 			if (JT(b) == 0)
375 				continue;
376 			SET_UNION(JT(b)->closure, b->closure, opt_state->nodewords);
377 			SET_UNION(JF(b)->closure, b->closure, opt_state->nodewords);
378 		}
379 	}
380 }
381 
382 /*
383  * Return the register number that is used by s.  If A and X are both
384  * used, return AX_ATOM.  If no register is used, return -1.
385  *
386  * The implementation should probably change to an array access.
387  */
388 static int
389 atomuse(struct stmt *s)
390 {
391 	register int c = s->code;
392 
393 	if (c == NOP)
394 		return -1;
395 
396 	switch (BPF_CLASS(c)) {
397 
398 	case BPF_RET:
399 		return (BPF_RVAL(c) == BPF_A) ? A_ATOM :
400 			(BPF_RVAL(c) == BPF_X) ? X_ATOM : -1;
401 
402 	case BPF_LD:
403 	case BPF_LDX:
404 		return (BPF_MODE(c) == BPF_IND) ? X_ATOM :
405 			(BPF_MODE(c) == BPF_MEM) ? s->k : -1;
406 
407 	case BPF_ST:
408 		return A_ATOM;
409 
410 	case BPF_STX:
411 		return X_ATOM;
412 
413 	case BPF_JMP:
414 	case BPF_ALU:
415 		if (BPF_SRC(c) == BPF_X)
416 			return AX_ATOM;
417 		return A_ATOM;
418 
419 	case BPF_MISC:
420 		return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM;
421 	}
422 	abort();
423 	/* NOTREACHED */
424 }
425 
426 /*
427  * Return the register number that is defined by 's'.  We assume that
428  * a single stmt cannot define more than one register.  If no register
429  * is defined, return -1.
430  *
431  * The implementation should probably change to an array access.
432  */
433 static int
434 atomdef(struct stmt *s)
435 {
436 	if (s->code == NOP)
437 		return -1;
438 
439 	switch (BPF_CLASS(s->code)) {
440 
441 	case BPF_LD:
442 	case BPF_ALU:
443 		return A_ATOM;
444 
445 	case BPF_LDX:
446 		return X_ATOM;
447 
448 	case BPF_ST:
449 	case BPF_STX:
450 		return s->k;
451 
452 	case BPF_MISC:
453 		return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM;
454 	}
455 	return -1;
456 }
457 
458 /*
459  * Compute the sets of registers used, defined, and killed by 'b'.
460  *
461  * "Used" means that a statement in 'b' uses the register before any
462  * statement in 'b' defines it, i.e. it uses the value left in
463  * that register by a predecessor block of this block.
464  * "Defined" means that a statement in 'b' defines it.
465  * "Killed" means that a statement in 'b' defines it before any
466  * statement in 'b' uses it, i.e. it kills the value left in that
467  * register by a predecessor block of this block.
468  */
469 static void
470 compute_local_ud(struct block *b)
471 {
472 	struct slist *s;
473 	atomset def = 0, use = 0, killed = 0;
474 	int atom;
475 
476 	for (s = b->stmts; s; s = s->next) {
477 		if (s->s.code == NOP)
478 			continue;
479 		atom = atomuse(&s->s);
480 		if (atom >= 0) {
481 			if (atom == AX_ATOM) {
482 				if (!ATOMELEM(def, X_ATOM))
483 					use |= ATOMMASK(X_ATOM);
484 				if (!ATOMELEM(def, A_ATOM))
485 					use |= ATOMMASK(A_ATOM);
486 			}
487 			else if (atom < N_ATOMS) {
488 				if (!ATOMELEM(def, atom))
489 					use |= ATOMMASK(atom);
490 			}
491 			else
492 				abort();
493 		}
494 		atom = atomdef(&s->s);
495 		if (atom >= 0) {
496 			if (!ATOMELEM(use, atom))
497 				killed |= ATOMMASK(atom);
498 			def |= ATOMMASK(atom);
499 		}
500 	}
501 	if (BPF_CLASS(b->s.code) == BPF_JMP) {
502 		/*
503 		 * XXX - what about RET?
504 		 */
505 		atom = atomuse(&b->s);
506 		if (atom >= 0) {
507 			if (atom == AX_ATOM) {
508 				if (!ATOMELEM(def, X_ATOM))
509 					use |= ATOMMASK(X_ATOM);
510 				if (!ATOMELEM(def, A_ATOM))
511 					use |= ATOMMASK(A_ATOM);
512 			}
513 			else if (atom < N_ATOMS) {
514 				if (!ATOMELEM(def, atom))
515 					use |= ATOMMASK(atom);
516 			}
517 			else
518 				abort();
519 		}
520 	}
521 
522 	b->def = def;
523 	b->kill = killed;
524 	b->in_use = use;
525 }
526 
527 /*
528  * Assume graph is already leveled.
529  */
530 static void
531 find_ud(opt_state_t *opt_state, struct block *root)
532 {
533 	int i, maxlevel;
534 	struct block *p;
535 
536 	/*
537 	 * root->level is the highest level no found;
538 	 * count down from there.
539 	 */
540 	maxlevel = root->level;
541 	for (i = maxlevel; i >= 0; --i)
542 		for (p = opt_state->levels[i]; p; p = p->link) {
543 			compute_local_ud(p);
544 			p->out_use = 0;
545 		}
546 
547 	for (i = 1; i <= maxlevel; ++i) {
548 		for (p = opt_state->levels[i]; p; p = p->link) {
549 			p->out_use |= JT(p)->in_use | JF(p)->in_use;
550 			p->in_use |= p->out_use &~ p->kill;
551 		}
552 	}
553 }
554 static void
555 init_val(opt_state_t *opt_state)
556 {
557 	opt_state->curval = 0;
558 	opt_state->next_vnode = opt_state->vnode_base;
559 	memset((char *)opt_state->vmap, 0, opt_state->maxval * sizeof(*opt_state->vmap));
560 	memset((char *)opt_state->hashtbl, 0, sizeof opt_state->hashtbl);
561 }
562 
563 /* Because we really don't have an IR, this stuff is a little messy. */
564 static int
565 F(opt_state_t *opt_state, int code, int v0, int v1)
566 {
567 	u_int hash;
568 	int val;
569 	struct valnode *p;
570 
571 	hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8);
572 	hash %= MODULUS;
573 
574 	for (p = opt_state->hashtbl[hash]; p; p = p->next)
575 		if (p->code == code && p->v0 == v0 && p->v1 == v1)
576 			return p->val;
577 
578 	val = ++opt_state->curval;
579 	if (BPF_MODE(code) == BPF_IMM &&
580 	    (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) {
581 		opt_state->vmap[val].const_val = v0;
582 		opt_state->vmap[val].is_const = 1;
583 	}
584 	p = opt_state->next_vnode++;
585 	p->val = val;
586 	p->code = code;
587 	p->v0 = v0;
588 	p->v1 = v1;
589 	p->next = opt_state->hashtbl[hash];
590 	opt_state->hashtbl[hash] = p;
591 
592 	return val;
593 }
594 
595 static inline void
596 vstore(struct stmt *s, int *valp, int newval, int alter)
597 {
598 	if (alter && *valp == newval)
599 		s->code = NOP;
600 	else
601 		*valp = newval;
602 }
603 
604 /*
605  * Do constant-folding on binary operators.
606  * (Unary operators are handled elsewhere.)
607  */
608 static void
609 fold_op(compiler_state_t *cstate, struct icode *ic, opt_state_t *opt_state,
610     struct stmt *s, int v0, int v1)
611 {
612 	bpf_u_int32 a, b;
613 
614 	a = opt_state->vmap[v0].const_val;
615 	b = opt_state->vmap[v1].const_val;
616 
617 	switch (BPF_OP(s->code)) {
618 	case BPF_ADD:
619 		a += b;
620 		break;
621 
622 	case BPF_SUB:
623 		a -= b;
624 		break;
625 
626 	case BPF_MUL:
627 		a *= b;
628 		break;
629 
630 	case BPF_DIV:
631 		if (b == 0)
632 			bpf_error(cstate, "division by zero");
633 		a /= b;
634 		break;
635 
636 	case BPF_MOD:
637 		if (b == 0)
638 			bpf_error(cstate, "modulus by zero");
639 		a %= b;
640 		break;
641 
642 	case BPF_AND:
643 		a &= b;
644 		break;
645 
646 	case BPF_OR:
647 		a |= b;
648 		break;
649 
650 	case BPF_XOR:
651 		a ^= b;
652 		break;
653 
654 	case BPF_LSH:
655 		a <<= b;
656 		break;
657 
658 	case BPF_RSH:
659 		a >>= b;
660 		break;
661 
662 	default:
663 		abort();
664 	}
665 	s->k = a;
666 	s->code = BPF_LD|BPF_IMM;
667 	opt_state->done = 0;
668 }
669 
670 static inline struct slist *
671 this_op(struct slist *s)
672 {
673 	while (s != 0 && s->s.code == NOP)
674 		s = s->next;
675 	return s;
676 }
677 
678 static void
679 opt_not(struct block *b)
680 {
681 	struct block *tmp = JT(b);
682 
683 	JT(b) = JF(b);
684 	JF(b) = tmp;
685 }
686 
687 static void
688 opt_peep(opt_state_t *opt_state, struct block *b)
689 {
690 	struct slist *s;
691 	struct slist *next, *last;
692 	int val;
693 
694 	s = b->stmts;
695 	if (s == 0)
696 		return;
697 
698 	last = s;
699 	for (/*empty*/; /*empty*/; s = next) {
700 		/*
701 		 * Skip over nops.
702 		 */
703 		s = this_op(s);
704 		if (s == 0)
705 			break;	/* nothing left in the block */
706 
707 		/*
708 		 * Find the next real instruction after that one
709 		 * (skipping nops).
710 		 */
711 		next = this_op(s->next);
712 		if (next == 0)
713 			break;	/* no next instruction */
714 		last = next;
715 
716 		/*
717 		 * st  M[k]	-->	st  M[k]
718 		 * ldx M[k]		tax
719 		 */
720 		if (s->s.code == BPF_ST &&
721 		    next->s.code == (BPF_LDX|BPF_MEM) &&
722 		    s->s.k == next->s.k) {
723 			opt_state->done = 0;
724 			next->s.code = BPF_MISC|BPF_TAX;
725 		}
726 		/*
727 		 * ld  #k	-->	ldx  #k
728 		 * tax			txa
729 		 */
730 		if (s->s.code == (BPF_LD|BPF_IMM) &&
731 		    next->s.code == (BPF_MISC|BPF_TAX)) {
732 			s->s.code = BPF_LDX|BPF_IMM;
733 			next->s.code = BPF_MISC|BPF_TXA;
734 			opt_state->done = 0;
735 		}
736 		/*
737 		 * This is an ugly special case, but it happens
738 		 * when you say tcp[k] or udp[k] where k is a constant.
739 		 */
740 		if (s->s.code == (BPF_LD|BPF_IMM)) {
741 			struct slist *add, *tax, *ild;
742 
743 			/*
744 			 * Check that X isn't used on exit from this
745 			 * block (which the optimizer might cause).
746 			 * We know the code generator won't generate
747 			 * any local dependencies.
748 			 */
749 			if (ATOMELEM(b->out_use, X_ATOM))
750 				continue;
751 
752 			/*
753 			 * Check that the instruction following the ldi
754 			 * is an addx, or it's an ldxms with an addx
755 			 * following it (with 0 or more nops between the
756 			 * ldxms and addx).
757 			 */
758 			if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B))
759 				add = next;
760 			else
761 				add = this_op(next->next);
762 			if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X))
763 				continue;
764 
765 			/*
766 			 * Check that a tax follows that (with 0 or more
767 			 * nops between them).
768 			 */
769 			tax = this_op(add->next);
770 			if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX))
771 				continue;
772 
773 			/*
774 			 * Check that an ild follows that (with 0 or more
775 			 * nops between them).
776 			 */
777 			ild = this_op(tax->next);
778 			if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD ||
779 			    BPF_MODE(ild->s.code) != BPF_IND)
780 				continue;
781 			/*
782 			 * We want to turn this sequence:
783 			 *
784 			 * (004) ldi     #0x2		{s}
785 			 * (005) ldxms   [14]		{next}  -- optional
786 			 * (006) addx			{add}
787 			 * (007) tax			{tax}
788 			 * (008) ild     [x+0]		{ild}
789 			 *
790 			 * into this sequence:
791 			 *
792 			 * (004) nop
793 			 * (005) ldxms   [14]
794 			 * (006) nop
795 			 * (007) nop
796 			 * (008) ild     [x+2]
797 			 *
798 			 * XXX We need to check that X is not
799 			 * subsequently used, because we want to change
800 			 * what'll be in it after this sequence.
801 			 *
802 			 * We know we can eliminate the accumulator
803 			 * modifications earlier in the sequence since
804 			 * it is defined by the last stmt of this sequence
805 			 * (i.e., the last statement of the sequence loads
806 			 * a value into the accumulator, so we can eliminate
807 			 * earlier operations on the accumulator).
808 			 */
809 			ild->s.k += s->s.k;
810 			s->s.code = NOP;
811 			add->s.code = NOP;
812 			tax->s.code = NOP;
813 			opt_state->done = 0;
814 		}
815 	}
816 	/*
817 	 * If the comparison at the end of a block is an equality
818 	 * comparison against a constant, and nobody uses the value
819 	 * we leave in the A register at the end of a block, and
820 	 * the operation preceding the comparison is an arithmetic
821 	 * operation, we can sometime optimize it away.
822 	 */
823 	if (b->s.code == (BPF_JMP|BPF_JEQ|BPF_K) &&
824 	    !ATOMELEM(b->out_use, A_ATOM)) {
825 	    	/*
826 	    	 * We can optimize away certain subtractions of the
827 	    	 * X register.
828 	    	 */
829 		if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X)) {
830 			val = b->val[X_ATOM];
831 			if (opt_state->vmap[val].is_const) {
832 				/*
833 				 * If we have a subtract to do a comparison,
834 				 * and the X register is a known constant,
835 				 * we can merge this value into the
836 				 * comparison:
837 				 *
838 				 * sub x  ->	nop
839 				 * jeq #y	jeq #(x+y)
840 				 */
841 				b->s.k += opt_state->vmap[val].const_val;
842 				last->s.code = NOP;
843 				opt_state->done = 0;
844 			} else if (b->s.k == 0) {
845 				/*
846 				 * If the X register isn't a constant,
847 				 * and the comparison in the test is
848 				 * against 0, we can compare with the
849 				 * X register, instead:
850 				 *
851 				 * sub x  ->	nop
852 				 * jeq #0	jeq x
853 				 */
854 				last->s.code = NOP;
855 				b->s.code = BPF_JMP|BPF_JEQ|BPF_X;
856 				opt_state->done = 0;
857 			}
858 		}
859 		/*
860 		 * Likewise, a constant subtract can be simplified:
861 		 *
862 		 * sub #x ->	nop
863 		 * jeq #y ->	jeq #(x+y)
864 		 */
865 		else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K)) {
866 			last->s.code = NOP;
867 			b->s.k += last->s.k;
868 			opt_state->done = 0;
869 		}
870 		/*
871 		 * And, similarly, a constant AND can be simplified
872 		 * if we're testing against 0, i.e.:
873 		 *
874 		 * and #k	nop
875 		 * jeq #0  ->	jset #k
876 		 */
877 		else if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) &&
878 		    b->s.k == 0) {
879 			b->s.k = last->s.k;
880 			b->s.code = BPF_JMP|BPF_K|BPF_JSET;
881 			last->s.code = NOP;
882 			opt_state->done = 0;
883 			opt_not(b);
884 		}
885 	}
886 	/*
887 	 * jset #0        ->   never
888 	 * jset #ffffffff ->   always
889 	 */
890 	if (b->s.code == (BPF_JMP|BPF_K|BPF_JSET)) {
891 		if (b->s.k == 0)
892 			JT(b) = JF(b);
893 		if ((u_int)b->s.k == 0xffffffffU)
894 			JF(b) = JT(b);
895 	}
896 	/*
897 	 * If we're comparing against the index register, and the index
898 	 * register is a known constant, we can just compare against that
899 	 * constant.
900 	 */
901 	val = b->val[X_ATOM];
902 	if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_X) {
903 		bpf_int32 v = opt_state->vmap[val].const_val;
904 		b->s.code &= ~BPF_X;
905 		b->s.k = v;
906 	}
907 	/*
908 	 * If the accumulator is a known constant, we can compute the
909 	 * comparison result.
910 	 */
911 	val = b->val[A_ATOM];
912 	if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) {
913 		bpf_int32 v = opt_state->vmap[val].const_val;
914 		switch (BPF_OP(b->s.code)) {
915 
916 		case BPF_JEQ:
917 			v = v == b->s.k;
918 			break;
919 
920 		case BPF_JGT:
921 			v = (unsigned)v > (unsigned)b->s.k;
922 			break;
923 
924 		case BPF_JGE:
925 			v = (unsigned)v >= (unsigned)b->s.k;
926 			break;
927 
928 		case BPF_JSET:
929 			v &= b->s.k;
930 			break;
931 
932 		default:
933 			abort();
934 		}
935 		if (JF(b) != JT(b))
936 			opt_state->done = 0;
937 		if (v)
938 			JF(b) = JT(b);
939 		else
940 			JT(b) = JF(b);
941 	}
942 }
943 
944 /*
945  * Compute the symbolic value of expression of 's', and update
946  * anything it defines in the value table 'val'.  If 'alter' is true,
947  * do various optimizations.  This code would be cleaner if symbolic
948  * evaluation and code transformations weren't folded together.
949  */
950 static void
951 opt_stmt(compiler_state_t *cstate, struct icode *ic, opt_state_t *opt_state,
952     struct stmt *s, int val[], int alter)
953 {
954 	int op;
955 	int v;
956 
957 	switch (s->code) {
958 
959 	case BPF_LD|BPF_ABS|BPF_W:
960 	case BPF_LD|BPF_ABS|BPF_H:
961 	case BPF_LD|BPF_ABS|BPF_B:
962 		v = F(opt_state, s->code, s->k, 0L);
963 		vstore(s, &val[A_ATOM], v, alter);
964 		break;
965 
966 	case BPF_LD|BPF_IND|BPF_W:
967 	case BPF_LD|BPF_IND|BPF_H:
968 	case BPF_LD|BPF_IND|BPF_B:
969 		v = val[X_ATOM];
970 		if (alter && opt_state->vmap[v].is_const) {
971 			s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code);
972 			s->k += opt_state->vmap[v].const_val;
973 			v = F(opt_state, s->code, s->k, 0L);
974 			opt_state->done = 0;
975 		}
976 		else
977 			v = F(opt_state, s->code, s->k, v);
978 		vstore(s, &val[A_ATOM], v, alter);
979 		break;
980 
981 	case BPF_LD|BPF_LEN:
982 		v = F(opt_state, s->code, 0L, 0L);
983 		vstore(s, &val[A_ATOM], v, alter);
984 		break;
985 
986 	case BPF_LD|BPF_IMM:
987 		v = K(s->k);
988 		vstore(s, &val[A_ATOM], v, alter);
989 		break;
990 
991 	case BPF_LDX|BPF_IMM:
992 		v = K(s->k);
993 		vstore(s, &val[X_ATOM], v, alter);
994 		break;
995 
996 	case BPF_LDX|BPF_MSH|BPF_B:
997 		v = F(opt_state, s->code, s->k, 0L);
998 		vstore(s, &val[X_ATOM], v, alter);
999 		break;
1000 
1001 	case BPF_ALU|BPF_NEG:
1002 		if (alter && opt_state->vmap[val[A_ATOM]].is_const) {
1003 			s->code = BPF_LD|BPF_IMM;
1004 			s->k = -opt_state->vmap[val[A_ATOM]].const_val;
1005 			val[A_ATOM] = K(s->k);
1006 		}
1007 		else
1008 			val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], 0L);
1009 		break;
1010 
1011 	case BPF_ALU|BPF_ADD|BPF_K:
1012 	case BPF_ALU|BPF_SUB|BPF_K:
1013 	case BPF_ALU|BPF_MUL|BPF_K:
1014 	case BPF_ALU|BPF_DIV|BPF_K:
1015 	case BPF_ALU|BPF_MOD|BPF_K:
1016 	case BPF_ALU|BPF_AND|BPF_K:
1017 	case BPF_ALU|BPF_OR|BPF_K:
1018 	case BPF_ALU|BPF_XOR|BPF_K:
1019 	case BPF_ALU|BPF_LSH|BPF_K:
1020 	case BPF_ALU|BPF_RSH|BPF_K:
1021 		op = BPF_OP(s->code);
1022 		if (alter) {
1023 			if (s->k == 0) {
1024 				/* don't optimize away "sub #0"
1025 				 * as it may be needed later to
1026 				 * fixup the generated math code */
1027 				if (op == BPF_ADD ||
1028 				    op == BPF_LSH || op == BPF_RSH ||
1029 				    op == BPF_OR || op == BPF_XOR) {
1030 					s->code = NOP;
1031 					break;
1032 				}
1033 				if (op == BPF_MUL || op == BPF_AND) {
1034 					s->code = BPF_LD|BPF_IMM;
1035 					val[A_ATOM] = K(s->k);
1036 					break;
1037 				}
1038 			}
1039 			if (opt_state->vmap[val[A_ATOM]].is_const) {
1040 				fold_op(cstate, ic, opt_state, s, val[A_ATOM], K(s->k));
1041 				val[A_ATOM] = K(s->k);
1042 				break;
1043 			}
1044 		}
1045 		val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], K(s->k));
1046 		break;
1047 
1048 	case BPF_ALU|BPF_ADD|BPF_X:
1049 	case BPF_ALU|BPF_SUB|BPF_X:
1050 	case BPF_ALU|BPF_MUL|BPF_X:
1051 	case BPF_ALU|BPF_DIV|BPF_X:
1052 	case BPF_ALU|BPF_MOD|BPF_X:
1053 	case BPF_ALU|BPF_AND|BPF_X:
1054 	case BPF_ALU|BPF_OR|BPF_X:
1055 	case BPF_ALU|BPF_XOR|BPF_X:
1056 	case BPF_ALU|BPF_LSH|BPF_X:
1057 	case BPF_ALU|BPF_RSH|BPF_X:
1058 		op = BPF_OP(s->code);
1059 		if (alter && opt_state->vmap[val[X_ATOM]].is_const) {
1060 			if (opt_state->vmap[val[A_ATOM]].is_const) {
1061 				fold_op(cstate, ic, opt_state, s, val[A_ATOM], val[X_ATOM]);
1062 				val[A_ATOM] = K(s->k);
1063 			}
1064 			else {
1065 				s->code = BPF_ALU|BPF_K|op;
1066 				s->k = opt_state->vmap[val[X_ATOM]].const_val;
1067 				opt_state->done = 0;
1068 				val[A_ATOM] =
1069 					F(opt_state, s->code, val[A_ATOM], K(s->k));
1070 			}
1071 			break;
1072 		}
1073 		/*
1074 		 * Check if we're doing something to an accumulator
1075 		 * that is 0, and simplify.  This may not seem like
1076 		 * much of a simplification but it could open up further
1077 		 * optimizations.
1078 		 * XXX We could also check for mul by 1, etc.
1079 		 */
1080 		if (alter && opt_state->vmap[val[A_ATOM]].is_const
1081 		    && opt_state->vmap[val[A_ATOM]].const_val == 0) {
1082 			if (op == BPF_ADD || op == BPF_OR || op == BPF_XOR) {
1083 				s->code = BPF_MISC|BPF_TXA;
1084 				vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1085 				break;
1086 			}
1087 			else if (op == BPF_MUL || op == BPF_DIV || op == BPF_MOD ||
1088 				 op == BPF_AND || op == BPF_LSH || op == BPF_RSH) {
1089 				s->code = BPF_LD|BPF_IMM;
1090 				s->k = 0;
1091 				vstore(s, &val[A_ATOM], K(s->k), alter);
1092 				break;
1093 			}
1094 			else if (op == BPF_NEG) {
1095 				s->code = NOP;
1096 				break;
1097 			}
1098 		}
1099 		val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], val[X_ATOM]);
1100 		break;
1101 
1102 	case BPF_MISC|BPF_TXA:
1103 		vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1104 		break;
1105 
1106 	case BPF_LD|BPF_MEM:
1107 		v = val[s->k];
1108 		if (alter && opt_state->vmap[v].is_const) {
1109 			s->code = BPF_LD|BPF_IMM;
1110 			s->k = opt_state->vmap[v].const_val;
1111 			opt_state->done = 0;
1112 		}
1113 		vstore(s, &val[A_ATOM], v, alter);
1114 		break;
1115 
1116 	case BPF_MISC|BPF_TAX:
1117 		vstore(s, &val[X_ATOM], val[A_ATOM], alter);
1118 		break;
1119 
1120 	case BPF_LDX|BPF_MEM:
1121 		v = val[s->k];
1122 		if (alter && opt_state->vmap[v].is_const) {
1123 			s->code = BPF_LDX|BPF_IMM;
1124 			s->k = opt_state->vmap[v].const_val;
1125 			opt_state->done = 0;
1126 		}
1127 		vstore(s, &val[X_ATOM], v, alter);
1128 		break;
1129 
1130 	case BPF_ST:
1131 		vstore(s, &val[s->k], val[A_ATOM], alter);
1132 		break;
1133 
1134 	case BPF_STX:
1135 		vstore(s, &val[s->k], val[X_ATOM], alter);
1136 		break;
1137 	}
1138 }
1139 
1140 static void
1141 deadstmt(opt_state_t *opt_state, register struct stmt *s, register struct stmt *last[])
1142 {
1143 	register int atom;
1144 
1145 	atom = atomuse(s);
1146 	if (atom >= 0) {
1147 		if (atom == AX_ATOM) {
1148 			last[X_ATOM] = 0;
1149 			last[A_ATOM] = 0;
1150 		}
1151 		else
1152 			last[atom] = 0;
1153 	}
1154 	atom = atomdef(s);
1155 	if (atom >= 0) {
1156 		if (last[atom]) {
1157 			opt_state->done = 0;
1158 			last[atom]->code = NOP;
1159 		}
1160 		last[atom] = s;
1161 	}
1162 }
1163 
1164 static void
1165 opt_deadstores(opt_state_t *opt_state, register struct block *b)
1166 {
1167 	register struct slist *s;
1168 	register int atom;
1169 	struct stmt *last[N_ATOMS];
1170 
1171 	memset((char *)last, 0, sizeof last);
1172 
1173 	for (s = b->stmts; s != 0; s = s->next)
1174 		deadstmt(opt_state, &s->s, last);
1175 	deadstmt(opt_state, &b->s, last);
1176 
1177 	for (atom = 0; atom < N_ATOMS; ++atom)
1178 		if (last[atom] && !ATOMELEM(b->out_use, atom)) {
1179 			last[atom]->code = NOP;
1180 			opt_state->done = 0;
1181 		}
1182 }
1183 
1184 static void
1185 opt_blk(compiler_state_t *cstate, struct icode *ic, opt_state_t *opt_state,
1186     struct block *b, int do_stmts)
1187 {
1188 	struct slist *s;
1189 	struct edge *p;
1190 	int i;
1191 	bpf_int32 aval, xval;
1192 
1193 #if 0
1194 	for (s = b->stmts; s && s->next; s = s->next)
1195 		if (BPF_CLASS(s->s.code) == BPF_JMP) {
1196 			do_stmts = 0;
1197 			break;
1198 		}
1199 #endif
1200 
1201 	/*
1202 	 * Initialize the atom values.
1203 	 */
1204 	p = b->in_edges;
1205 	if (p == 0) {
1206 		/*
1207 		 * We have no predecessors, so everything is undefined
1208 		 * upon entry to this block.
1209 		 */
1210 		memset((char *)b->val, 0, sizeof(b->val));
1211 	} else {
1212 		/*
1213 		 * Inherit values from our predecessors.
1214 		 *
1215 		 * First, get the values from the predecessor along the
1216 		 * first edge leading to this node.
1217 		 */
1218 		memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val));
1219 		/*
1220 		 * Now look at all the other nodes leading to this node.
1221 		 * If, for the predecessor along that edge, a register
1222 		 * has a different value from the one we have (i.e.,
1223 		 * control paths are merging, and the merging paths
1224 		 * assign different values to that register), give the
1225 		 * register the undefined value of 0.
1226 		 */
1227 		while ((p = p->next) != NULL) {
1228 			for (i = 0; i < N_ATOMS; ++i)
1229 				if (b->val[i] != p->pred->val[i])
1230 					b->val[i] = 0;
1231 		}
1232 	}
1233 	aval = b->val[A_ATOM];
1234 	xval = b->val[X_ATOM];
1235 	for (s = b->stmts; s; s = s->next)
1236 		opt_stmt(cstate, ic, opt_state, &s->s, b->val, do_stmts);
1237 
1238 	/*
1239 	 * This is a special case: if we don't use anything from this
1240 	 * block, and we load the accumulator or index register with a
1241 	 * value that is already there, or if this block is a return,
1242 	 * eliminate all the statements.
1243 	 *
1244 	 * XXX - what if it does a store?
1245 	 *
1246 	 * XXX - why does it matter whether we use anything from this
1247 	 * block?  If the accumulator or index register doesn't change
1248 	 * its value, isn't that OK even if we use that value?
1249 	 *
1250 	 * XXX - if we load the accumulator with a different value,
1251 	 * and the block ends with a conditional branch, we obviously
1252 	 * can't eliminate it, as the branch depends on that value.
1253 	 * For the index register, the conditional branch only depends
1254 	 * on the index register value if the test is against the index
1255 	 * register value rather than a constant; if nothing uses the
1256 	 * value we put into the index register, and we're not testing
1257 	 * against the index register's value, and there aren't any
1258 	 * other problems that would keep us from eliminating this
1259 	 * block, can we eliminate it?
1260 	 */
1261 	if (do_stmts &&
1262 	    ((b->out_use == 0 && aval != 0 && b->val[A_ATOM] == aval &&
1263 	      xval != 0 && b->val[X_ATOM] == xval) ||
1264 	     BPF_CLASS(b->s.code) == BPF_RET)) {
1265 		if (b->stmts != 0) {
1266 			b->stmts = 0;
1267 			opt_state->done = 0;
1268 		}
1269 	} else {
1270 		opt_peep(opt_state, b);
1271 		opt_deadstores(opt_state, b);
1272 	}
1273 	/*
1274 	 * Set up values for branch optimizer.
1275 	 */
1276 	if (BPF_SRC(b->s.code) == BPF_K)
1277 		b->oval = K(b->s.k);
1278 	else
1279 		b->oval = b->val[X_ATOM];
1280 	b->et.code = b->s.code;
1281 	b->ef.code = -b->s.code;
1282 }
1283 
1284 /*
1285  * Return true if any register that is used on exit from 'succ', has
1286  * an exit value that is different from the corresponding exit value
1287  * from 'b'.
1288  */
1289 static int
1290 use_conflict(struct block *b, struct block *succ)
1291 {
1292 	int atom;
1293 	atomset use = succ->out_use;
1294 
1295 	if (use == 0)
1296 		return 0;
1297 
1298 	for (atom = 0; atom < N_ATOMS; ++atom)
1299 		if (ATOMELEM(use, atom))
1300 			if (b->val[atom] != succ->val[atom])
1301 				return 1;
1302 	return 0;
1303 }
1304 
1305 static struct block *
1306 fold_edge(struct block *child, struct edge *ep)
1307 {
1308 	int sense;
1309 	int aval0, aval1, oval0, oval1;
1310 	int code = ep->code;
1311 
1312 	if (code < 0) {
1313 		code = -code;
1314 		sense = 0;
1315 	} else
1316 		sense = 1;
1317 
1318 	if (child->s.code != code)
1319 		return 0;
1320 
1321 	aval0 = child->val[A_ATOM];
1322 	oval0 = child->oval;
1323 	aval1 = ep->pred->val[A_ATOM];
1324 	oval1 = ep->pred->oval;
1325 
1326 	if (aval0 != aval1)
1327 		return 0;
1328 
1329 	if (oval0 == oval1)
1330 		/*
1331 		 * The operands of the branch instructions are
1332 		 * identical, so the result is true if a true
1333 		 * branch was taken to get here, otherwise false.
1334 		 */
1335 		return sense ? JT(child) : JF(child);
1336 
1337 	if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K))
1338 		/*
1339 		 * At this point, we only know the comparison if we
1340 		 * came down the true branch, and it was an equality
1341 		 * comparison with a constant.
1342 		 *
1343 		 * I.e., if we came down the true branch, and the branch
1344 		 * was an equality comparison with a constant, we know the
1345 		 * accumulator contains that constant.  If we came down
1346 		 * the false branch, or the comparison wasn't with a
1347 		 * constant, we don't know what was in the accumulator.
1348 		 *
1349 		 * We rely on the fact that distinct constants have distinct
1350 		 * value numbers.
1351 		 */
1352 		return JF(child);
1353 
1354 	return 0;
1355 }
1356 
1357 static void
1358 opt_j(opt_state_t *opt_state, struct edge *ep)
1359 {
1360 	register int i, k;
1361 	register struct block *target;
1362 
1363 	if (JT(ep->succ) == 0)
1364 		return;
1365 
1366 	if (JT(ep->succ) == JF(ep->succ)) {
1367 		/*
1368 		 * Common branch targets can be eliminated, provided
1369 		 * there is no data dependency.
1370 		 */
1371 		if (!use_conflict(ep->pred, ep->succ->et.succ)) {
1372 			opt_state->done = 0;
1373 			ep->succ = JT(ep->succ);
1374 		}
1375 	}
1376 	/*
1377 	 * For each edge dominator that matches the successor of this
1378 	 * edge, promote the edge successor to the its grandchild.
1379 	 *
1380 	 * XXX We violate the set abstraction here in favor a reasonably
1381 	 * efficient loop.
1382 	 */
1383  top:
1384 	for (i = 0; i < opt_state->edgewords; ++i) {
1385 		register bpf_u_int32 x = ep->edom[i];
1386 
1387 		while (x != 0) {
1388 			k = ffs(x) - 1;
1389 			x &=~ (1 << k);
1390 			k += i * BITS_PER_WORD;
1391 
1392 			target = fold_edge(ep->succ, opt_state->edges[k]);
1393 			/*
1394 			 * Check that there is no data dependency between
1395 			 * nodes that will be violated if we move the edge.
1396 			 */
1397 			if (target != 0 && !use_conflict(ep->pred, target)) {
1398 				opt_state->done = 0;
1399 				ep->succ = target;
1400 				if (JT(target) != 0)
1401 					/*
1402 					 * Start over unless we hit a leaf.
1403 					 */
1404 					goto top;
1405 				return;
1406 			}
1407 		}
1408 	}
1409 }
1410 
1411 
1412 static void
1413 or_pullup(opt_state_t *opt_state, struct block *b)
1414 {
1415 	int val, at_top;
1416 	struct block *pull;
1417 	struct block **diffp, **samep;
1418 	struct edge *ep;
1419 
1420 	ep = b->in_edges;
1421 	if (ep == 0)
1422 		return;
1423 
1424 	/*
1425 	 * Make sure each predecessor loads the same value.
1426 	 * XXX why?
1427 	 */
1428 	val = ep->pred->val[A_ATOM];
1429 	for (ep = ep->next; ep != 0; ep = ep->next)
1430 		if (val != ep->pred->val[A_ATOM])
1431 			return;
1432 
1433 	if (JT(b->in_edges->pred) == b)
1434 		diffp = &JT(b->in_edges->pred);
1435 	else
1436 		diffp = &JF(b->in_edges->pred);
1437 
1438 	at_top = 1;
1439 	while (1) {
1440 		if (*diffp == 0)
1441 			return;
1442 
1443 		if (JT(*diffp) != JT(b))
1444 			return;
1445 
1446 		if (!SET_MEMBER((*diffp)->dom, b->id))
1447 			return;
1448 
1449 		if ((*diffp)->val[A_ATOM] != val)
1450 			break;
1451 
1452 		diffp = &JF(*diffp);
1453 		at_top = 0;
1454 	}
1455 	samep = &JF(*diffp);
1456 	while (1) {
1457 		if (*samep == 0)
1458 			return;
1459 
1460 		if (JT(*samep) != JT(b))
1461 			return;
1462 
1463 		if (!SET_MEMBER((*samep)->dom, b->id))
1464 			return;
1465 
1466 		if ((*samep)->val[A_ATOM] == val)
1467 			break;
1468 
1469 		/* XXX Need to check that there are no data dependencies
1470 		   between dp0 and dp1.  Currently, the code generator
1471 		   will not produce such dependencies. */
1472 		samep = &JF(*samep);
1473 	}
1474 #ifdef notdef
1475 	/* XXX This doesn't cover everything. */
1476 	for (i = 0; i < N_ATOMS; ++i)
1477 		if ((*samep)->val[i] != pred->val[i])
1478 			return;
1479 #endif
1480 	/* Pull up the node. */
1481 	pull = *samep;
1482 	*samep = JF(pull);
1483 	JF(pull) = *diffp;
1484 
1485 	/*
1486 	 * At the top of the chain, each predecessor needs to point at the
1487 	 * pulled up node.  Inside the chain, there is only one predecessor
1488 	 * to worry about.
1489 	 */
1490 	if (at_top) {
1491 		for (ep = b->in_edges; ep != 0; ep = ep->next) {
1492 			if (JT(ep->pred) == b)
1493 				JT(ep->pred) = pull;
1494 			else
1495 				JF(ep->pred) = pull;
1496 		}
1497 	}
1498 	else
1499 		*diffp = pull;
1500 
1501 	opt_state->done = 0;
1502 }
1503 
1504 static void
1505 and_pullup(opt_state_t *opt_state, struct block *b)
1506 {
1507 	int val, at_top;
1508 	struct block *pull;
1509 	struct block **diffp, **samep;
1510 	struct edge *ep;
1511 
1512 	ep = b->in_edges;
1513 	if (ep == 0)
1514 		return;
1515 
1516 	/*
1517 	 * Make sure each predecessor loads the same value.
1518 	 */
1519 	val = ep->pred->val[A_ATOM];
1520 	for (ep = ep->next; ep != 0; ep = ep->next)
1521 		if (val != ep->pred->val[A_ATOM])
1522 			return;
1523 
1524 	if (JT(b->in_edges->pred) == b)
1525 		diffp = &JT(b->in_edges->pred);
1526 	else
1527 		diffp = &JF(b->in_edges->pred);
1528 
1529 	at_top = 1;
1530 	while (1) {
1531 		if (*diffp == 0)
1532 			return;
1533 
1534 		if (JF(*diffp) != JF(b))
1535 			return;
1536 
1537 		if (!SET_MEMBER((*diffp)->dom, b->id))
1538 			return;
1539 
1540 		if ((*diffp)->val[A_ATOM] != val)
1541 			break;
1542 
1543 		diffp = &JT(*diffp);
1544 		at_top = 0;
1545 	}
1546 	samep = &JT(*diffp);
1547 	while (1) {
1548 		if (*samep == 0)
1549 			return;
1550 
1551 		if (JF(*samep) != JF(b))
1552 			return;
1553 
1554 		if (!SET_MEMBER((*samep)->dom, b->id))
1555 			return;
1556 
1557 		if ((*samep)->val[A_ATOM] == val)
1558 			break;
1559 
1560 		/* XXX Need to check that there are no data dependencies
1561 		   between diffp and samep.  Currently, the code generator
1562 		   will not produce such dependencies. */
1563 		samep = &JT(*samep);
1564 	}
1565 #ifdef notdef
1566 	/* XXX This doesn't cover everything. */
1567 	for (i = 0; i < N_ATOMS; ++i)
1568 		if ((*samep)->val[i] != pred->val[i])
1569 			return;
1570 #endif
1571 	/* Pull up the node. */
1572 	pull = *samep;
1573 	*samep = JT(pull);
1574 	JT(pull) = *diffp;
1575 
1576 	/*
1577 	 * At the top of the chain, each predecessor needs to point at the
1578 	 * pulled up node.  Inside the chain, there is only one predecessor
1579 	 * to worry about.
1580 	 */
1581 	if (at_top) {
1582 		for (ep = b->in_edges; ep != 0; ep = ep->next) {
1583 			if (JT(ep->pred) == b)
1584 				JT(ep->pred) = pull;
1585 			else
1586 				JF(ep->pred) = pull;
1587 		}
1588 	}
1589 	else
1590 		*diffp = pull;
1591 
1592 	opt_state->done = 0;
1593 }
1594 
1595 static void
1596 opt_blks(compiler_state_t *cstate, opt_state_t *opt_state, struct icode *ic,
1597     int do_stmts)
1598 {
1599 	int i, maxlevel;
1600 	struct block *p;
1601 
1602 	init_val(opt_state);
1603 	maxlevel = ic->root->level;
1604 
1605 	find_inedges(opt_state, ic->root);
1606 	for (i = maxlevel; i >= 0; --i)
1607 		for (p = opt_state->levels[i]; p; p = p->link)
1608 			opt_blk(cstate, ic, opt_state, p, do_stmts);
1609 
1610 	if (do_stmts)
1611 		/*
1612 		 * No point trying to move branches; it can't possibly
1613 		 * make a difference at this point.
1614 		 */
1615 		return;
1616 
1617 	for (i = 1; i <= maxlevel; ++i) {
1618 		for (p = opt_state->levels[i]; p; p = p->link) {
1619 			opt_j(opt_state, &p->et);
1620 			opt_j(opt_state, &p->ef);
1621 		}
1622 	}
1623 
1624 	find_inedges(opt_state, ic->root);
1625 	for (i = 1; i <= maxlevel; ++i) {
1626 		for (p = opt_state->levels[i]; p; p = p->link) {
1627 			or_pullup(opt_state, p);
1628 			and_pullup(opt_state, p);
1629 		}
1630 	}
1631 }
1632 
1633 static inline void
1634 link_inedge(struct edge *parent, struct block *child)
1635 {
1636 	parent->next = child->in_edges;
1637 	child->in_edges = parent;
1638 }
1639 
1640 static void
1641 find_inedges(opt_state_t *opt_state, struct block *root)
1642 {
1643 	int i;
1644 	struct block *b;
1645 
1646 	for (i = 0; i < opt_state->n_blocks; ++i)
1647 		opt_state->blocks[i]->in_edges = 0;
1648 
1649 	/*
1650 	 * Traverse the graph, adding each edge to the predecessor
1651 	 * list of its successors.  Skip the leaves (i.e. level 0).
1652 	 */
1653 	for (i = root->level; i > 0; --i) {
1654 		for (b = opt_state->levels[i]; b != 0; b = b->link) {
1655 			link_inedge(&b->et, JT(b));
1656 			link_inedge(&b->ef, JF(b));
1657 		}
1658 	}
1659 }
1660 
1661 static void
1662 opt_root(struct block **b)
1663 {
1664 	struct slist *tmp, *s;
1665 
1666 	s = (*b)->stmts;
1667 	(*b)->stmts = 0;
1668 	while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b))
1669 		*b = JT(*b);
1670 
1671 	tmp = (*b)->stmts;
1672 	if (tmp != 0)
1673 		sappend(s, tmp);
1674 	(*b)->stmts = s;
1675 
1676 	/*
1677 	 * If the root node is a return, then there is no
1678 	 * point executing any statements (since the bpf machine
1679 	 * has no side effects).
1680 	 */
1681 	if (BPF_CLASS((*b)->s.code) == BPF_RET)
1682 		(*b)->stmts = 0;
1683 }
1684 
1685 static void
1686 opt_loop(compiler_state_t *cstate, opt_state_t *opt_state, struct icode *ic,
1687     int do_stmts)
1688 {
1689 
1690 #ifdef BDEBUG
1691 	if (pcap_optimizer_debug > 1) {
1692 		printf("opt_loop(root, %d) begin\n", do_stmts);
1693 		opt_dump(cstate, ic);
1694 	}
1695 #endif
1696 	do {
1697 		opt_state->done = 1;
1698 		find_levels(opt_state, ic);
1699 		find_dom(opt_state, ic->root);
1700 		find_closure(opt_state, ic->root);
1701 		find_ud(opt_state, ic->root);
1702 		find_edom(opt_state, ic->root);
1703 		opt_blks(cstate, opt_state, ic, do_stmts);
1704 #ifdef BDEBUG
1705 		if (pcap_optimizer_debug > 1) {
1706 			printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, opt_state->done);
1707 			opt_dump(cstate, ic);
1708 		}
1709 #endif
1710 	} while (!opt_state->done);
1711 }
1712 
1713 /*
1714  * Optimize the filter code in its dag representation.
1715  */
1716 void
1717 bpf_optimize(compiler_state_t *cstate, struct icode *ic)
1718 {
1719 	opt_state_t opt_state;
1720 
1721 	opt_init(cstate, &opt_state, ic);
1722 	opt_loop(cstate, &opt_state, ic, 0);
1723 	opt_loop(cstate, &opt_state, ic, 1);
1724 	intern_blocks(&opt_state, ic);
1725 #ifdef BDEBUG
1726 	if (pcap_optimizer_debug > 1) {
1727 		printf("after intern_blocks()\n");
1728 		opt_dump(cstate, ic);
1729 	}
1730 #endif
1731 	opt_root(&ic->root);
1732 #ifdef BDEBUG
1733 	if (pcap_optimizer_debug > 1) {
1734 		printf("after opt_root()\n");
1735 		opt_dump(cstate, ic);
1736 	}
1737 #endif
1738 	opt_cleanup(&opt_state);
1739 }
1740 
1741 static void
1742 make_marks(struct icode *ic, struct block *p)
1743 {
1744 	if (!isMarked(ic, p)) {
1745 		Mark(ic, p);
1746 		if (BPF_CLASS(p->s.code) != BPF_RET) {
1747 			make_marks(ic, JT(p));
1748 			make_marks(ic, JF(p));
1749 		}
1750 	}
1751 }
1752 
1753 /*
1754  * Mark code array such that isMarked(ic->cur_mark, i) is true
1755  * only for nodes that are alive.
1756  */
1757 static void
1758 mark_code(struct icode *ic)
1759 {
1760 	ic->cur_mark += 1;
1761 	make_marks(ic, ic->root);
1762 }
1763 
1764 /*
1765  * True iff the two stmt lists load the same value from the packet into
1766  * the accumulator.
1767  */
1768 static int
1769 eq_slist(struct slist *x, struct slist *y)
1770 {
1771 	while (1) {
1772 		while (x && x->s.code == NOP)
1773 			x = x->next;
1774 		while (y && y->s.code == NOP)
1775 			y = y->next;
1776 		if (x == 0)
1777 			return y == 0;
1778 		if (y == 0)
1779 			return x == 0;
1780 		if (x->s.code != y->s.code || x->s.k != y->s.k)
1781 			return 0;
1782 		x = x->next;
1783 		y = y->next;
1784 	}
1785 }
1786 
1787 static inline int
1788 eq_blk(struct block *b0, struct block *b1)
1789 {
1790 	if (b0->s.code == b1->s.code &&
1791 	    b0->s.k == b1->s.k &&
1792 	    b0->et.succ == b1->et.succ &&
1793 	    b0->ef.succ == b1->ef.succ)
1794 		return eq_slist(b0->stmts, b1->stmts);
1795 	return 0;
1796 }
1797 
1798 static void
1799 intern_blocks(opt_state_t *opt_state, struct icode *ic)
1800 {
1801 	struct block *p;
1802 	int i, j;
1803 	int done1; /* don't shadow global */
1804  top:
1805 	done1 = 1;
1806 	for (i = 0; i < opt_state->n_blocks; ++i)
1807 		opt_state->blocks[i]->link = 0;
1808 
1809 	mark_code(ic);
1810 
1811 	for (i = opt_state->n_blocks - 1; --i >= 0; ) {
1812 		if (!isMarked(ic, opt_state->blocks[i]))
1813 			continue;
1814 		for (j = i + 1; j < opt_state->n_blocks; ++j) {
1815 			if (!isMarked(ic, opt_state->blocks[j]))
1816 				continue;
1817 			if (eq_blk(opt_state->blocks[i], opt_state->blocks[j])) {
1818 				opt_state->blocks[i]->link = opt_state->blocks[j]->link ?
1819 					opt_state->blocks[j]->link : opt_state->blocks[j];
1820 				break;
1821 			}
1822 		}
1823 	}
1824 	for (i = 0; i < opt_state->n_blocks; ++i) {
1825 		p = opt_state->blocks[i];
1826 		if (JT(p) == 0)
1827 			continue;
1828 		if (JT(p)->link) {
1829 			done1 = 0;
1830 			JT(p) = JT(p)->link;
1831 		}
1832 		if (JF(p)->link) {
1833 			done1 = 0;
1834 			JF(p) = JF(p)->link;
1835 		}
1836 	}
1837 	if (!done1)
1838 		goto top;
1839 }
1840 
1841 static void
1842 opt_cleanup(opt_state_t *opt_state)
1843 {
1844 	free((void *)opt_state->vnode_base);
1845 	free((void *)opt_state->vmap);
1846 	free((void *)opt_state->edges);
1847 	free((void *)opt_state->space);
1848 	free((void *)opt_state->levels);
1849 	free((void *)opt_state->blocks);
1850 }
1851 
1852 /*
1853  * Return the number of stmts in 's'.
1854  */
1855 static u_int
1856 slength(struct slist *s)
1857 {
1858 	u_int n = 0;
1859 
1860 	for (; s; s = s->next)
1861 		if (s->s.code != NOP)
1862 			++n;
1863 	return n;
1864 }
1865 
1866 /*
1867  * Return the number of nodes reachable by 'p'.
1868  * All nodes should be initially unmarked.
1869  */
1870 static int
1871 count_blocks(struct icode *ic, struct block *p)
1872 {
1873 	if (p == 0 || isMarked(ic, p))
1874 		return 0;
1875 	Mark(ic, p);
1876 	return count_blocks(ic, JT(p)) + count_blocks(ic, JF(p)) + 1;
1877 }
1878 
1879 /*
1880  * Do a depth first search on the flow graph, numbering the
1881  * the basic blocks, and entering them into the 'blocks' array.`
1882  */
1883 static void
1884 number_blks_r(opt_state_t *opt_state, struct icode *ic, struct block *p)
1885 {
1886 	int n;
1887 
1888 	if (p == 0 || isMarked(ic, p))
1889 		return;
1890 
1891 	Mark(ic, p);
1892 	n = opt_state->n_blocks++;
1893 	p->id = n;
1894 	opt_state->blocks[n] = p;
1895 
1896 	number_blks_r(opt_state, ic, JT(p));
1897 	number_blks_r(opt_state, ic, JF(p));
1898 }
1899 
1900 /*
1901  * Return the number of stmts in the flowgraph reachable by 'p'.
1902  * The nodes should be unmarked before calling.
1903  *
1904  * Note that "stmts" means "instructions", and that this includes
1905  *
1906  *	side-effect statements in 'p' (slength(p->stmts));
1907  *
1908  *	statements in the true branch from 'p' (count_stmts(JT(p)));
1909  *
1910  *	statements in the false branch from 'p' (count_stmts(JF(p)));
1911  *
1912  *	the conditional jump itself (1);
1913  *
1914  *	an extra long jump if the true branch requires it (p->longjt);
1915  *
1916  *	an extra long jump if the false branch requires it (p->longjf).
1917  */
1918 static u_int
1919 count_stmts(struct icode *ic, struct block *p)
1920 {
1921 	u_int n;
1922 
1923 	if (p == 0 || isMarked(ic, p))
1924 		return 0;
1925 	Mark(ic, p);
1926 	n = count_stmts(ic, JT(p)) + count_stmts(ic, JF(p));
1927 	return slength(p->stmts) + n + 1 + p->longjt + p->longjf;
1928 }
1929 
1930 /*
1931  * Allocate memory.  All allocation is done before optimization
1932  * is begun.  A linear bound on the size of all data structures is computed
1933  * from the total number of blocks and/or statements.
1934  */
1935 static void
1936 opt_init(compiler_state_t *cstate, opt_state_t *opt_state, struct icode *ic)
1937 {
1938 	bpf_u_int32 *p;
1939 	int i, n, max_stmts;
1940 
1941 	/*
1942 	 * First, count the blocks, so we can malloc an array to map
1943 	 * block number to block.  Then, put the blocks into the array.
1944 	 */
1945 	unMarkAll(ic);
1946 	n = count_blocks(ic, ic->root);
1947 	opt_state->blocks = (struct block **)calloc(n, sizeof(*opt_state->blocks));
1948 	if (opt_state->blocks == NULL)
1949 		bpf_error(cstate, "malloc");
1950 	unMarkAll(ic);
1951 	opt_state->n_blocks = 0;
1952 	number_blks_r(opt_state, ic, ic->root);
1953 
1954 	opt_state->n_edges = 2 * opt_state->n_blocks;
1955 	opt_state->edges = (struct edge **)calloc(opt_state->n_edges, sizeof(*opt_state->edges));
1956 	if (opt_state->edges == NULL)
1957 		bpf_error(cstate, "malloc");
1958 
1959 	/*
1960 	 * The number of levels is bounded by the number of nodes.
1961 	 */
1962 	opt_state->levels = (struct block **)calloc(opt_state->n_blocks, sizeof(*opt_state->levels));
1963 	if (opt_state->levels == NULL)
1964 		bpf_error(cstate, "malloc");
1965 
1966 	opt_state->edgewords = opt_state->n_edges / (8 * sizeof(bpf_u_int32)) + 1;
1967 	opt_state->nodewords = opt_state->n_blocks / (8 * sizeof(bpf_u_int32)) + 1;
1968 
1969 	/* XXX */
1970 	opt_state->space = (bpf_u_int32 *)malloc(2 * opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->space)
1971 				 + opt_state->n_edges * opt_state->edgewords * sizeof(*opt_state->space));
1972 	if (opt_state->space == NULL)
1973 		bpf_error(cstate, "malloc");
1974 	p = opt_state->space;
1975 	opt_state->all_dom_sets = p;
1976 	for (i = 0; i < n; ++i) {
1977 		opt_state->blocks[i]->dom = p;
1978 		p += opt_state->nodewords;
1979 	}
1980 	opt_state->all_closure_sets = p;
1981 	for (i = 0; i < n; ++i) {
1982 		opt_state->blocks[i]->closure = p;
1983 		p += opt_state->nodewords;
1984 	}
1985 	opt_state->all_edge_sets = p;
1986 	for (i = 0; i < n; ++i) {
1987 		register struct block *b = opt_state->blocks[i];
1988 
1989 		b->et.edom = p;
1990 		p += opt_state->edgewords;
1991 		b->ef.edom = p;
1992 		p += opt_state->edgewords;
1993 		b->et.id = i;
1994 		opt_state->edges[i] = &b->et;
1995 		b->ef.id = opt_state->n_blocks + i;
1996 		opt_state->edges[opt_state->n_blocks + i] = &b->ef;
1997 		b->et.pred = b;
1998 		b->ef.pred = b;
1999 	}
2000 	max_stmts = 0;
2001 	for (i = 0; i < n; ++i)
2002 		max_stmts += slength(opt_state->blocks[i]->stmts) + 1;
2003 	/*
2004 	 * We allocate at most 3 value numbers per statement,
2005 	 * so this is an upper bound on the number of valnodes
2006 	 * we'll need.
2007 	 */
2008 	opt_state->maxval = 3 * max_stmts;
2009 	opt_state->vmap = (struct vmapinfo *)calloc(opt_state->maxval, sizeof(*opt_state->vmap));
2010 	opt_state->vnode_base = (struct valnode *)calloc(opt_state->maxval, sizeof(*opt_state->vnode_base));
2011 	if (opt_state->vmap == NULL || opt_state->vnode_base == NULL)
2012 		bpf_error(cstate, "malloc");
2013 }
2014 
2015 /*
2016  * This is only used when supporting optimizer debugging.  It is
2017  * global state, so do *not* do more than one compile in parallel
2018  * and expect it to provide meaningful information.
2019  */
2020 #ifdef BDEBUG
2021 int bids[1000];
2022 #endif
2023 
2024 /*
2025  * Returns true if successful.  Returns false if a branch has
2026  * an offset that is too large.  If so, we have marked that
2027  * branch so that on a subsequent iteration, it will be treated
2028  * properly.
2029  */
2030 static int
2031 convert_code_r(compiler_state_t *cstate, conv_state_t *conv_state,
2032     struct icode *ic, struct block *p)
2033 {
2034 	struct bpf_insn *dst;
2035 	struct slist *src;
2036 	u_int slen;
2037 	u_int off;
2038 	int extrajmps;		/* number of extra jumps inserted */
2039 	struct slist **offset = NULL;
2040 
2041 	if (p == 0 || isMarked(ic, p))
2042 		return (1);
2043 	Mark(ic, p);
2044 
2045 	if (convert_code_r(cstate, conv_state, ic, JF(p)) == 0)
2046 		return (0);
2047 	if (convert_code_r(cstate, conv_state, ic, JT(p)) == 0)
2048 		return (0);
2049 
2050 	slen = slength(p->stmts);
2051 	dst = conv_state->ftail -= (slen + 1 + p->longjt + p->longjf);
2052 		/* inflate length by any extra jumps */
2053 
2054 	p->offset = (int)(dst - conv_state->fstart);
2055 
2056 	/* generate offset[] for convenience  */
2057 	if (slen) {
2058 		offset = (struct slist **)calloc(slen, sizeof(struct slist *));
2059 		if (!offset) {
2060 			bpf_error(cstate, "not enough core");
2061 			/*NOTREACHED*/
2062 		}
2063 	}
2064 	src = p->stmts;
2065 	for (off = 0; off < slen && src; off++) {
2066 #if 0
2067 		printf("off=%d src=%x\n", off, src);
2068 #endif
2069 		offset[off] = src;
2070 		src = src->next;
2071 	}
2072 
2073 	off = 0;
2074 	for (src = p->stmts; src; src = src->next) {
2075 		if (src->s.code == NOP)
2076 			continue;
2077 		dst->code = (u_short)src->s.code;
2078 		dst->k = src->s.k;
2079 
2080 		/* fill block-local relative jump */
2081 		if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) {
2082 #if 0
2083 			if (src->s.jt || src->s.jf) {
2084 				bpf_error(cstate, "illegal jmp destination");
2085 				/*NOTREACHED*/
2086 			}
2087 #endif
2088 			goto filled;
2089 		}
2090 		if (off == slen - 2)	/*???*/
2091 			goto filled;
2092 
2093 	    {
2094 		u_int i;
2095 		int jt, jf;
2096 static const char ljerr[] = "%s for block-local relative jump: off=%d";
2097 
2098 #if 0
2099 		printf("code=%x off=%d %x %x\n", src->s.code,
2100 			off, src->s.jt, src->s.jf);
2101 #endif
2102 
2103 		if (!src->s.jt || !src->s.jf) {
2104 			bpf_error(cstate, ljerr, "no jmp destination", off);
2105 			/*NOTREACHED*/
2106 		}
2107 
2108 		jt = jf = 0;
2109 		for (i = 0; i < slen; i++) {
2110 			if (offset[i] == src->s.jt) {
2111 				if (jt) {
2112 					bpf_error(cstate, ljerr, "multiple matches", off);
2113 					/*NOTREACHED*/
2114 				}
2115 
2116 				dst->jt = i - off - 1;
2117 				jt++;
2118 			}
2119 			if (offset[i] == src->s.jf) {
2120 				if (jf) {
2121 					bpf_error(cstate, ljerr, "multiple matches", off);
2122 					/*NOTREACHED*/
2123 				}
2124 				dst->jf = i - off - 1;
2125 				jf++;
2126 			}
2127 		}
2128 		if (!jt || !jf) {
2129 			bpf_error(cstate, ljerr, "no destination found", off);
2130 			/*NOTREACHED*/
2131 		}
2132 	    }
2133 filled:
2134 		++dst;
2135 		++off;
2136 	}
2137 	if (offset)
2138 		free(offset);
2139 
2140 #ifdef BDEBUG
2141 	bids[dst - conv_state->fstart] = p->id + 1;
2142 #endif
2143 	dst->code = (u_short)p->s.code;
2144 	dst->k = p->s.k;
2145 	if (JT(p)) {
2146 		extrajmps = 0;
2147 		off = JT(p)->offset - (p->offset + slen) - 1;
2148 		if (off >= 256) {
2149 		    /* offset too large for branch, must add a jump */
2150 		    if (p->longjt == 0) {
2151 		    	/* mark this instruction and retry */
2152 			p->longjt++;
2153 			return(0);
2154 		    }
2155 		    /* branch if T to following jump */
2156 		    dst->jt = extrajmps;
2157 		    extrajmps++;
2158 		    dst[extrajmps].code = BPF_JMP|BPF_JA;
2159 		    dst[extrajmps].k = off - extrajmps;
2160 		}
2161 		else
2162 		    dst->jt = off;
2163 		off = JF(p)->offset - (p->offset + slen) - 1;
2164 		if (off >= 256) {
2165 		    /* offset too large for branch, must add a jump */
2166 		    if (p->longjf == 0) {
2167 		    	/* mark this instruction and retry */
2168 			p->longjf++;
2169 			return(0);
2170 		    }
2171 		    /* branch if F to following jump */
2172 		    /* if two jumps are inserted, F goes to second one */
2173 		    dst->jf = extrajmps;
2174 		    extrajmps++;
2175 		    dst[extrajmps].code = BPF_JMP|BPF_JA;
2176 		    dst[extrajmps].k = off - extrajmps;
2177 		}
2178 		else
2179 		    dst->jf = off;
2180 	}
2181 	return (1);
2182 }
2183 
2184 
2185 /*
2186  * Convert flowgraph intermediate representation to the
2187  * BPF array representation.  Set *lenp to the number of instructions.
2188  *
2189  * This routine does *NOT* leak the memory pointed to by fp.  It *must
2190  * not* do free(fp) before returning fp; doing so would make no sense,
2191  * as the BPF array pointed to by the return value of icode_to_fcode()
2192  * must be valid - it's being returned for use in a bpf_program structure.
2193  *
2194  * If it appears that icode_to_fcode() is leaking, the problem is that
2195  * the program using pcap_compile() is failing to free the memory in
2196  * the BPF program when it's done - the leak is in the program, not in
2197  * the routine that happens to be allocating the memory.  (By analogy, if
2198  * a program calls fopen() without ever calling fclose() on the FILE *,
2199  * it will leak the FILE structure; the leak is not in fopen(), it's in
2200  * the program.)  Change the program to use pcap_freecode() when it's
2201  * done with the filter program.  See the pcap man page.
2202  */
2203 struct bpf_insn *
2204 icode_to_fcode(compiler_state_t *cstate, struct icode *ic,
2205     struct block *root, u_int *lenp)
2206 {
2207 	u_int n;
2208 	struct bpf_insn *fp;
2209 	conv_state_t conv_state;
2210 
2211 	/*
2212 	 * Loop doing convert_code_r() until no branches remain
2213 	 * with too-large offsets.
2214 	 */
2215 	while (1) {
2216 	    unMarkAll(ic);
2217 	    n = *lenp = count_stmts(ic, root);
2218 
2219 	    fp = (struct bpf_insn *)malloc(sizeof(*fp) * n);
2220 	    if (fp == NULL)
2221 		    bpf_error(cstate, "malloc");
2222 	    memset((char *)fp, 0, sizeof(*fp) * n);
2223 	    conv_state.fstart = fp;
2224 	    conv_state.ftail = fp + n;
2225 
2226 	    unMarkAll(ic);
2227 	    if (convert_code_r(cstate, &conv_state, ic, root))
2228 		break;
2229 	    free(fp);
2230 	}
2231 
2232 	return fp;
2233 }
2234 
2235 /*
2236  * Make a copy of a BPF program and put it in the "fcode" member of
2237  * a "pcap_t".
2238  *
2239  * If we fail to allocate memory for the copy, fill in the "errbuf"
2240  * member of the "pcap_t" with an error message, and return -1;
2241  * otherwise, return 0.
2242  */
2243 int
2244 install_bpf_program(pcap_t *p, struct bpf_program *fp)
2245 {
2246 	size_t prog_size;
2247 
2248 	/*
2249 	 * Validate the program.
2250 	 */
2251 	if (!bpf_validate(fp->bf_insns, fp->bf_len)) {
2252 		pcap_snprintf(p->errbuf, sizeof(p->errbuf),
2253 			"BPF program is not valid");
2254 		return (-1);
2255 	}
2256 
2257 	/*
2258 	 * Free up any already installed program.
2259 	 */
2260 	pcap_freecode(&p->fcode);
2261 
2262 	prog_size = sizeof(*fp->bf_insns) * fp->bf_len;
2263 	p->fcode.bf_len = fp->bf_len;
2264 	p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size);
2265 	if (p->fcode.bf_insns == NULL) {
2266 		pcap_snprintf(p->errbuf, sizeof(p->errbuf),
2267 			 "malloc: %s", pcap_strerror(errno));
2268 		return (-1);
2269 	}
2270 	memcpy(p->fcode.bf_insns, fp->bf_insns, prog_size);
2271 	return (0);
2272 }
2273 
2274 #ifdef BDEBUG
2275 static void
2276 dot_dump_node(struct icode *ic, struct block *block, struct bpf_program *prog,
2277     FILE *out)
2278 {
2279 	int icount, noffset;
2280 	int i;
2281 
2282 	if (block == NULL || isMarked(ic, block))
2283 		return;
2284 	Mark(ic, block);
2285 
2286 	icount = slength(block->stmts) + 1 + block->longjt + block->longjf;
2287 	noffset = min(block->offset + icount, (int)prog->bf_len);
2288 
2289 	fprintf(out, "\tblock%d [shape=ellipse, id=\"block-%d\" label=\"BLOCK%d\\n", block->id, block->id, block->id);
2290 	for (i = block->offset; i < noffset; i++) {
2291 		fprintf(out, "\\n%s", bpf_image(prog->bf_insns + i, i));
2292 	}
2293 	fprintf(out, "\" tooltip=\"");
2294 	for (i = 0; i < BPF_MEMWORDS; i++)
2295 		if (block->val[i] != 0)
2296 			fprintf(out, "val[%d]=%d ", i, block->val[i]);
2297 	fprintf(out, "val[A]=%d ", block->val[A_ATOM]);
2298 	fprintf(out, "val[X]=%d", block->val[X_ATOM]);
2299 	fprintf(out, "\"");
2300 	if (JT(block) == NULL)
2301 		fprintf(out, ", peripheries=2");
2302 	fprintf(out, "];\n");
2303 
2304 	dot_dump_node(ic, JT(block), prog, out);
2305 	dot_dump_node(ic, JF(block), prog, out);
2306 }
2307 
2308 static void
2309 dot_dump_edge(struct icode *ic, struct block *block, FILE *out)
2310 {
2311 	if (block == NULL || isMarked(ic, block))
2312 		return;
2313 	Mark(ic, block);
2314 
2315 	if (JT(block)) {
2316 		fprintf(out, "\t\"block%d\":se -> \"block%d\":n [label=\"T\"]; \n",
2317 				block->id, JT(block)->id);
2318 		fprintf(out, "\t\"block%d\":sw -> \"block%d\":n [label=\"F\"]; \n",
2319 			   block->id, JF(block)->id);
2320 	}
2321 	dot_dump_edge(ic, JT(block), out);
2322 	dot_dump_edge(ic, JF(block), out);
2323 }
2324 
2325 /* Output the block CFG using graphviz/DOT language
2326  * In the CFG, block's code, value index for each registers at EXIT,
2327  * and the jump relationship is show.
2328  *
2329  * example DOT for BPF `ip src host 1.1.1.1' is:
2330     digraph BPF {
2331     	block0 [shape=ellipse, id="block-0" label="BLOCK0\n\n(000) ldh      [12]\n(001) jeq      #0x800           jt 2	jf 5" tooltip="val[A]=0 val[X]=0"];
2332     	block1 [shape=ellipse, id="block-1" label="BLOCK1\n\n(002) ld       [26]\n(003) jeq      #0x1010101       jt 4	jf 5" tooltip="val[A]=0 val[X]=0"];
2333     	block2 [shape=ellipse, id="block-2" label="BLOCK2\n\n(004) ret      #68" tooltip="val[A]=0 val[X]=0", peripheries=2];
2334     	block3 [shape=ellipse, id="block-3" label="BLOCK3\n\n(005) ret      #0" tooltip="val[A]=0 val[X]=0", peripheries=2];
2335     	"block0":se -> "block1":n [label="T"];
2336     	"block0":sw -> "block3":n [label="F"];
2337     	"block1":se -> "block2":n [label="T"];
2338     	"block1":sw -> "block3":n [label="F"];
2339     }
2340  *
2341  *  After install graphviz on http://www.graphviz.org/, save it as bpf.dot
2342  *  and run `dot -Tpng -O bpf.dot' to draw the graph.
2343  */
2344 static void
2345 dot_dump(compiler_state_t *cstate, struct icode *ic)
2346 {
2347 	struct bpf_program f;
2348 	FILE *out = stdout;
2349 
2350 	memset(bids, 0, sizeof bids);
2351 	f.bf_insns = icode_to_fcode(cstate, ic, ic->root, &f.bf_len);
2352 
2353 	fprintf(out, "digraph BPF {\n");
2354 	ic->cur_mark = 0;
2355 	unMarkAll(ic);
2356 	dot_dump_node(ic, ic->root, &f, out);
2357 	ic->cur_mark = 0;
2358 	unMarkAll(ic);
2359 	dot_dump_edge(ic, ic->root, out);
2360 	fprintf(out, "}\n");
2361 
2362 	free((char *)f.bf_insns);
2363 }
2364 
2365 static void
2366 plain_dump(compiler_state_t *cstate, struct icode *ic)
2367 {
2368 	struct bpf_program f;
2369 
2370 	memset(bids, 0, sizeof bids);
2371 	f.bf_insns = icode_to_fcode(cstate, ic, ic->root, &f.bf_len);
2372 	bpf_dump(&f, 1);
2373 	putchar('\n');
2374 	free((char *)f.bf_insns);
2375 }
2376 
2377 static void
2378 opt_dump(compiler_state_t *cstate, struct icode *ic)
2379 {
2380 	/* if optimizer debugging is enabled, output DOT graph
2381 	 * `pcap_optimizer_debug=4' is equivalent to -dddd to follow -d/-dd/-ddd
2382 	 * convention in tcpdump command line
2383 	 */
2384 	if (pcap_optimizer_debug > 3)
2385 		dot_dump(cstate, ic);
2386 	else
2387 		plain_dump(cstate, ic);
2388 }
2389 #endif
2390