xref: /netbsd-src/external/cddl/osnet/dev/fbt/fbt.c (revision aceb213538ec08a74028e213127af18aa17bf1cf)
1 /*	$NetBSD: fbt.c,v 1.12 2013/03/03 18:18:13 christos Exp $	*/
2 
3 /*
4  * CDDL HEADER START
5  *
6  * The contents of this file are subject to the terms of the
7  * Common Development and Distribution License (the "License").
8  * You may not use this file except in compliance with the License.
9  *
10  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
11  * or http://www.opensolaris.org/os/licensing.
12  * See the License for the specific language governing permissions
13  * and limitations under the License.
14  *
15  * When distributing Covered Code, include this CDDL HEADER in each
16  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
17  * If applicable, add the following below this CDDL HEADER, with the
18  * fields enclosed by brackets "[]" replaced with your own identifying
19  * information: Portions Copyright [yyyy] [name of copyright owner]
20  *
21  * CDDL HEADER END
22  *
23  * Portions Copyright 2006-2008 John Birrell jb@freebsd.org
24  * Portions Copyright 2010 Darran Hunt darran@NetBSD.org
25  *
26  * $FreeBSD: src/sys/cddl/dev/fbt/fbt.c,v 1.1.4.1 2009/08/03 08:13:06 kensmith Exp $
27  *
28  */
29 
30 /*
31  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
32  * Use is subject to license terms.
33  */
34 
35 #include <sys/cdefs.h>
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/conf.h>
39 #include <sys/cpuvar.h>
40 #include <sys/fcntl.h>
41 #include <sys/filio.h>
42 #include <sys/kernel.h>
43 #include <sys/kmem.h>
44 #include <sys/ksyms.h>
45 #include <sys/cpu.h>
46 #include <sys/kthread.h>
47 #include <sys/limits.h>
48 #include <sys/linker.h>
49 #include <sys/lock.h>
50 #include <sys/malloc.h>
51 #include <sys/module.h>
52 #include <sys/mutex.h>
53 #include <sys/poll.h>
54 #include <sys/proc.h>
55 #include <sys/selinfo.h>
56 #include <sys/syscall.h>
57 #include <sys/uio.h>
58 #include <sys/unistd.h>
59 
60 #include <machine/cpu.h>
61 #include <machine/cpufunc.h>
62 #include <machine/specialreg.h>
63 #if 0
64 #include <x86/cpuvar.h>
65 #endif
66 #include <x86/cputypes.h>
67 
68 #define ELFSIZE ARCH_ELFSIZE
69 #include <sys/exec_elf.h>
70 
71 #include <sys/dtrace.h>
72 #include <sys/dtrace_bsd.h>
73 #include <sys/kern_ctf.h>
74 #include <sys/dtrace_impl.h>
75 
76 mod_ctf_t *modptr;
77 
78 MALLOC_DEFINE(M_FBT, "fbt", "Function Boundary Tracing");
79 
80 #define	FBT_PUSHL_EBP		0x55
81 #define	FBT_MOVL_ESP_EBP0_V0	0x8b
82 #define	FBT_MOVL_ESP_EBP1_V0	0xec
83 #define	FBT_MOVL_ESP_EBP0_V1	0x89
84 #define	FBT_MOVL_ESP_EBP1_V1	0xe5
85 #define	FBT_REX_RSP_RBP		0x48
86 
87 #define	FBT_POPL_EBP		0x5d
88 #define	FBT_RET			0xc3
89 #define	FBT_RET_IMM16		0xc2
90 #define	FBT_LEAVE		0xc9
91 
92 #ifdef __amd64__
93 #define	FBT_PATCHVAL		0xcc
94 #else
95 #define	FBT_PATCHVAL		0xf0
96 #endif
97 
98 static dev_type_open(fbt_open);
99 static int	fbt_unload(void);
100 static void	fbt_getargdesc(void *, dtrace_id_t, void *, dtrace_argdesc_t *);
101 static void	fbt_provide_module(void *, dtrace_modctl_t *);
102 static void	fbt_destroy(void *, dtrace_id_t, void *);
103 static int	fbt_enable(void *, dtrace_id_t, void *);
104 static void	fbt_disable(void *, dtrace_id_t, void *);
105 static void	fbt_load(void);
106 static void	fbt_suspend(void *, dtrace_id_t, void *);
107 static void	fbt_resume(void *, dtrace_id_t, void *);
108 
109 #define	FBT_ENTRY	"entry"
110 #define	FBT_RETURN	"return"
111 #define	FBT_ADDR2NDX(addr)	((((uintptr_t)(addr)) >> 4) & fbt_probetab_mask)
112 #define	FBT_PROBETAB_SIZE	0x8000		/* 32k entries -- 128K total */
113 
114 static const struct cdevsw fbt_cdevsw = {
115 	fbt_open, noclose, noread, nowrite, noioctl,
116 	nostop, notty, nopoll, nommap, nokqfilter,
117 	D_OTHER
118 };
119 
120 static dtrace_pattr_t fbt_attr = {
121 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
122 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
123 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
124 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
125 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
126 };
127 
128 static dtrace_pops_t fbt_pops = {
129 	NULL,
130 	fbt_provide_module,
131 	fbt_enable,
132 	fbt_disable,
133 	fbt_suspend,
134 	fbt_resume,
135 	fbt_getargdesc,
136 	NULL,
137 	NULL,
138 	fbt_destroy
139 };
140 
141 typedef struct fbt_probe {
142 	struct fbt_probe *fbtp_hashnext;
143 	uint8_t		*fbtp_patchpoint;
144 	int8_t		fbtp_rval;
145 	uint8_t		fbtp_patchval;
146 	uint8_t		fbtp_savedval;
147 	uintptr_t	fbtp_roffset;
148 	dtrace_id_t	fbtp_id;
149 	const char	*fbtp_name;
150 	dtrace_modctl_t	*fbtp_ctl;
151 	int		fbtp_loadcnt;
152 	int		fbtp_primary;
153 	int		fbtp_invop_cnt;
154 	int		fbtp_symindx;
155 	struct fbt_probe *fbtp_next;
156 } fbt_probe_t;
157 
158 #ifdef notyet
159 static struct cdev		*fbt_cdev;
160 static int			fbt_verbose = 0;
161 #endif
162 static dtrace_provider_id_t	fbt_id;
163 static fbt_probe_t		**fbt_probetab;
164 static int			fbt_probetab_size;
165 static int			fbt_probetab_mask;
166 
167 static void
168 fbt_doubletrap(void)
169 {
170 	fbt_probe_t *fbt;
171 	int i;
172 
173 	for (i = 0; i < fbt_probetab_size; i++) {
174 		fbt = fbt_probetab[i];
175 
176 		for (; fbt != NULL; fbt = fbt->fbtp_next)
177 			*fbt->fbtp_patchpoint = fbt->fbtp_savedval;
178 	}
179 }
180 
181 
182 static int
183 fbt_invop(uintptr_t addr, uintptr_t *stack, uintptr_t rval)
184 {
185 	solaris_cpu_t *cpu = &solaris_cpu[cpu_number()];
186 	uintptr_t stack0, stack1, stack2, stack3, stack4;
187 	fbt_probe_t *fbt = fbt_probetab[FBT_ADDR2NDX(addr)];
188 
189 	for (; fbt != NULL; fbt = fbt->fbtp_hashnext) {
190 		if ((uintptr_t)fbt->fbtp_patchpoint == addr) {
191 			fbt->fbtp_invop_cnt++;
192 			if (fbt->fbtp_roffset == 0) {
193 				int i = 0;
194 				/*
195 				 * When accessing the arguments on the stack,
196 				 * we must protect against accessing beyond
197 				 * the stack.  We can safely set NOFAULT here
198 				 * -- we know that interrupts are already
199 				 * disabled.
200 				 */
201 				DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
202 				cpu->cpu_dtrace_caller = stack[i++];
203 				stack0 = stack[i++];
204 				stack1 = stack[i++];
205 				stack2 = stack[i++];
206 				stack3 = stack[i++];
207 				stack4 = stack[i++];
208 				DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT |
209 				    CPU_DTRACE_BADADDR);
210 
211 				dtrace_probe(fbt->fbtp_id, stack0, stack1,
212 				    stack2, stack3, stack4);
213 
214 				cpu->cpu_dtrace_caller = 0;
215 			} else {
216 #ifdef __amd64__
217 				/*
218 				 * On amd64, we instrument the ret, not the
219 				 * leave.  We therefore need to set the caller
220 				 * to assure that the top frame of a stack()
221 				 * action is correct.
222 				 */
223 				DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
224 				cpu->cpu_dtrace_caller = stack[0];
225 				DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT |
226 				    CPU_DTRACE_BADADDR);
227 #endif
228 
229 				dtrace_probe(fbt->fbtp_id, fbt->fbtp_roffset,
230 				    rval, 0, 0, 0);
231 				cpu->cpu_dtrace_caller = 0;
232 			}
233 
234 			return (fbt->fbtp_rval);
235 		}
236 	}
237 
238 	return (0);
239 }
240 
241 static int
242 fbt_provide_module_cb(const char *name, int symindx, void *value,
243 	uint32_t symsize, int type, void *opaque)
244 {
245 	fbt_probe_t *fbt, *retfbt;
246 	u_int8_t *instr, *limit;
247 	dtrace_modctl_t *mod = opaque;
248 	const char *modname = mod->mod_info->mi_name;
249 	int j;
250 	int size;
251 
252 	/* got a function? */
253 	if (ELF_ST_TYPE(type) != STT_FUNC) {
254 	    return 0;
255 	}
256 
257 	if (strncmp(name, "dtrace_", 7) == 0 &&
258 	    strncmp(name, "dtrace_safe_", 12) != 0) {
259 		/*
260 		 * Anything beginning with "dtrace_" may be called
261 		 * from probe context unless it explicitly indicates
262 		 * that it won't be called from probe context by
263 		 * using the prefix "dtrace_safe_".
264 		 */
265 		return (0);
266 	}
267 
268 	if (name[0] == '_' && name[1] == '_')
269 		return (0);
270 
271 	/*
272 	 * Exclude some more symbols which can be called from probe context.
273 	 */
274 	if (strcmp(name, "x86_curcpu") == 0 /* CPU */
275 	    || strcmp(name, "x86_curlwp") == 0 /* curproc, curlwp, curthread */
276 	    || strcmp(name, "cpu_index") == 0 /* cpu_number, curcpu_id */
277 	    || strncmp(name, "db_", 3) == 0 /* debugger */
278 	    || strncmp(name, "ddb_", 4) == 0 /* debugger */
279 	    || strncmp(name, "kdb_", 4) == 0 /* debugger */
280 	    || strncmp(name, "lockdebug_", 10) == 0 /* lockdebug XXX for now */
281 	    || strncmp(name, "kauth_", 5) == 0 /* CRED XXX for now */
282 	    ) {
283 		return 0;
284 	}
285 
286 	instr = (u_int8_t *) value;
287 	limit = (u_int8_t *) value + symsize;
288 
289 #ifdef __amd64__
290 	while (instr < limit) {
291 		if (*instr == FBT_PUSHL_EBP)
292 			break;
293 
294 		if ((size = dtrace_instr_size(instr)) <= 0)
295 			break;
296 
297 		instr += size;
298 	}
299 
300 	if (instr >= limit || *instr != FBT_PUSHL_EBP) {
301 		/*
302 		 * We either don't save the frame pointer in this
303 		 * function, or we ran into some disassembly
304 		 * screw-up.  Either way, we bail.
305 		 */
306 		return (0);
307 	}
308 #else
309 	if (instr[0] != FBT_PUSHL_EBP) {
310 		return (0);
311 	}
312 
313 	if (!(instr[1] == FBT_MOVL_ESP_EBP0_V0 &&
314 	    instr[2] == FBT_MOVL_ESP_EBP1_V0) &&
315 	    !(instr[1] == FBT_MOVL_ESP_EBP0_V1 &&
316 	    instr[2] == FBT_MOVL_ESP_EBP1_V1)) {
317 		return (0);
318 	}
319 #endif
320 	fbt = malloc(sizeof (fbt_probe_t), M_FBT, M_WAITOK | M_ZERO);
321 	fbt->fbtp_name = name;
322 	fbt->fbtp_id = dtrace_probe_create(fbt_id, modname,
323 	    name, FBT_ENTRY, 3, fbt);
324 	fbt->fbtp_patchpoint = instr;
325 	fbt->fbtp_ctl = mod;
326 	/* fbt->fbtp_loadcnt = lf->loadcnt; */
327 	fbt->fbtp_rval = DTRACE_INVOP_PUSHL_EBP;
328 	fbt->fbtp_savedval = *instr;
329 	fbt->fbtp_patchval = FBT_PATCHVAL;
330 	fbt->fbtp_symindx = symindx;
331 
332 	fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)];
333 	fbt_probetab[FBT_ADDR2NDX(instr)] = fbt;
334 	mod->mod_fbtentries++;
335 
336 	retfbt = NULL;
337 
338 	while (instr < limit) {
339 		if (instr >= limit)
340 			return (0);
341 
342 		/*
343 		 * If this disassembly fails, then we've likely walked off into
344 		 * a jump table or some other unsuitable area.  Bail out of the
345 		 * disassembly now.
346 		 */
347 		if ((size = dtrace_instr_size(instr)) <= 0)
348 			return (0);
349 
350 #ifdef __amd64__
351 		/*
352 		 * We only instrument "ret" on amd64 -- we don't yet instrument
353 		 * ret imm16, largely because the compiler doesn't seem to
354 		 * (yet) emit them in the kernel...
355 		 */
356 		if (*instr != FBT_RET) {
357 			instr += size;
358 			continue;
359 		}
360 #else
361 		if (!(size == 1 &&
362 		    (*instr == FBT_POPL_EBP || *instr == FBT_LEAVE) &&
363 		    (*(instr + 1) == FBT_RET ||
364 		    *(instr + 1) == FBT_RET_IMM16))) {
365 			instr += size;
366 			continue;
367 		}
368 #endif
369 
370 		/*
371 		 * We (desperately) want to avoid erroneously instrumenting a
372 		 * jump table, especially given that our markers are pretty
373 		 * short:  two bytes on x86, and just one byte on amd64.  To
374 		 * determine if we're looking at a true instruction sequence
375 		 * or an inline jump table that happens to contain the same
376 		 * byte sequences, we resort to some heuristic sleeze:  we
377 		 * treat this instruction as being contained within a pointer,
378 		 * and see if that pointer points to within the body of the
379 		 * function.  If it does, we refuse to instrument it.
380 		 */
381 		for (j = 0; j < sizeof (uintptr_t); j++) {
382 			caddr_t check = (caddr_t) instr - j;
383 			uint8_t *ptr;
384 
385 			if (check < (caddr_t)value)
386 				break;
387 
388 			if (check + sizeof (caddr_t) > (caddr_t)limit)
389 				continue;
390 
391 			ptr = *(uint8_t **)check;
392 
393 			if (ptr >= (uint8_t *) value && ptr < limit) {
394 				instr += size;
395 				continue;
396 			}
397 		}
398 
399 		/*
400 		 * We have a winner!
401 		 */
402 		fbt = malloc(sizeof (fbt_probe_t), M_FBT, M_WAITOK | M_ZERO);
403 		fbt->fbtp_name = name;
404 
405 		if (retfbt == NULL) {
406 			fbt->fbtp_id = dtrace_probe_create(fbt_id, modname,
407 			    name, FBT_RETURN, 3, fbt);
408 		} else {
409 			retfbt->fbtp_next = fbt;
410 			fbt->fbtp_id = retfbt->fbtp_id;
411 		}
412 
413 		retfbt = fbt;
414 		fbt->fbtp_patchpoint = instr;
415 		fbt->fbtp_ctl = mod;
416 		/* fbt->fbtp_loadcnt = lf->loadcnt; */
417 		fbt->fbtp_symindx = symindx;
418 
419 #ifndef __amd64__
420 		if (*instr == FBT_POPL_EBP) {
421 			fbt->fbtp_rval = DTRACE_INVOP_POPL_EBP;
422 		} else {
423 			ASSERT(*instr == FBT_LEAVE);
424 			fbt->fbtp_rval = DTRACE_INVOP_LEAVE;
425 		}
426 		fbt->fbtp_roffset =
427 		    (uintptr_t)(instr - (uint8_t *) value) + 1;
428 
429 #else
430 		ASSERT(*instr == FBT_RET);
431 		fbt->fbtp_rval = DTRACE_INVOP_RET;
432 		fbt->fbtp_roffset =
433 		    (uintptr_t)(instr - (uint8_t *) value);
434 #endif
435 
436 		fbt->fbtp_savedval = *instr;
437 		fbt->fbtp_patchval = FBT_PATCHVAL;
438 		fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)];
439 		fbt_probetab[FBT_ADDR2NDX(instr)] = fbt;
440 
441 		mod->mod_fbtentries++;
442 
443 		instr += size;
444 	}
445 
446 	return 0;
447 }
448 
449 static void
450 fbt_provide_module(void *arg, dtrace_modctl_t *mod)
451 {
452 	char modname[MAXPATHLEN];
453 	int i;
454 	size_t len;
455 
456 	strlcpy(modname, mod->mod_info->mi_name, sizeof(modname));
457 	len = strlen(modname);
458 	if (len > 5 && strcmp(modname + len - 3, ".kmod") == 0)
459 		modname[len - 4] = '\0';
460 
461 	/*
462 	 * Employees of dtrace and their families are ineligible.  Void
463 	 * where prohibited.
464 	 */
465 	if (strcmp(modname, "dtrace") == 0)
466 		return;
467 
468 	/*
469 	 * The cyclic timer subsystem can be built as a module and DTrace
470 	 * depends on that, so it is ineligible too.
471 	 */
472 	if (strcmp(modname, "cyclic") == 0)
473 		return;
474 
475 	/*
476 	 * To register with DTrace, a module must list 'dtrace' as a
477 	 * dependency in order for the kernel linker to resolve
478 	 * symbols like dtrace_register(). All modules with such a
479 	 * dependency are ineligible for FBT tracing.
480 	 */
481 	for (i = 0; i < mod->mod_nrequired; i++) {
482 		if (strncmp(mod->mod_required[i]->mod_info->mi_name,
483 			    "dtrace", 6) == 0)
484 			return;
485 	}
486 
487 	if (mod->mod_fbtentries) {
488 		/*
489 		 * This module has some FBT entries allocated; we're afraid
490 		 * to screw with it.
491 		 */
492 		return;
493 	}
494 
495 	/*
496 	 * List the functions in the module and the symbol values.
497 	 */
498 	ksyms_mod_foreach(modname, fbt_provide_module_cb, mod);
499 }
500 
501 static void
502 fbt_destroy(void *arg, dtrace_id_t id, void *parg)
503 {
504 	fbt_probe_t *fbt = parg, *next, *hash, *last;
505 	dtrace_modctl_t *ctl;
506 	int ndx;
507 
508 	do {
509 		ctl = fbt->fbtp_ctl;
510 
511 		ctl->mod_fbtentries--;
512 
513 		/*
514 		 * Now we need to remove this probe from the fbt_probetab.
515 		 */
516 		ndx = FBT_ADDR2NDX(fbt->fbtp_patchpoint);
517 		last = NULL;
518 		hash = fbt_probetab[ndx];
519 
520 		while (hash != fbt) {
521 			ASSERT(hash != NULL);
522 			last = hash;
523 			hash = hash->fbtp_hashnext;
524 		}
525 
526 		if (last != NULL) {
527 			last->fbtp_hashnext = fbt->fbtp_hashnext;
528 		} else {
529 			fbt_probetab[ndx] = fbt->fbtp_hashnext;
530 		}
531 
532 		next = fbt->fbtp_next;
533 		free(fbt, M_FBT);
534 
535 		fbt = next;
536 	} while (fbt != NULL);
537 }
538 
539 static int
540 fbt_enable(void *arg, dtrace_id_t id, void *parg)
541 {
542 	fbt_probe_t *fbt = parg;
543 #if 0
544 	dtrace_modctl_t *ctl = fbt->fbtp_ctl;
545 #endif
546 	u_long psl;
547 	u_long cr0;
548 
549 
550 #if 0	/* XXX TBD */
551 	ctl->nenabled++;
552 
553 	/*
554 	 * Now check that our modctl has the expected load count.  If it
555 	 * doesn't, this module must have been unloaded and reloaded -- and
556 	 * we're not going to touch it.
557 	 */
558 	if (ctl->loadcnt != fbt->fbtp_loadcnt) {
559 		if (fbt_verbose) {
560 			printf("fbt is failing for probe %s "
561 			    "(module %s reloaded)",
562 			    fbt->fbtp_name, ctl->filename);
563 		}
564 
565 		return;
566 	}
567 #endif
568 
569 	/* Disable interrupts. */
570 	psl = x86_read_psl();
571 	x86_disable_intr();
572 
573 	/* Disable write protection in supervisor mode. */
574 	cr0 = rcr0();
575 	lcr0(cr0 & ~CR0_WP);
576 
577 	for (; fbt != NULL; fbt = fbt->fbtp_next) {
578 		*fbt->fbtp_patchpoint = fbt->fbtp_patchval;
579 	}
580 
581 	/* Write back and invalidate cache, flush pipelines. */
582 	wbinvd();
583 	x86_flush();
584 	x86_write_psl(psl);
585 
586 	/* Re-enable write protection. */
587 	lcr0(cr0);
588 
589 	return 0;
590 }
591 
592 static void
593 fbt_disable(void *arg, dtrace_id_t id, void *parg)
594 {
595 	fbt_probe_t *fbt = parg;
596 #if 0
597 	dtrace_modctl_t *ctl = fbt->fbtp_ctl;
598 #endif
599 	u_long psl;
600 	u_long cr0;
601 
602 #if 0	/* XXX TBD */
603 	ASSERT(ctl->nenabled > 0);
604 	ctl->nenabled--;
605 
606 	if ((ctl->loadcnt != fbt->fbtp_loadcnt))
607 		return;
608 #endif
609 	/* Disable interrupts. */
610 	psl = x86_read_psl();
611 	x86_disable_intr();
612 
613 	/* Disable write protection in supervisor mode. */
614 	cr0 = rcr0();
615 	lcr0(cr0 & ~CR0_WP);
616 
617 	for (; fbt != NULL; fbt = fbt->fbtp_next)
618 		*fbt->fbtp_patchpoint = fbt->fbtp_savedval;
619 
620 	/* Write back and invalidate cache, flush pipelines. */
621 	wbinvd();
622 	x86_flush();
623 	x86_write_psl(psl);
624 
625 	/* Re-enable write protection. */
626 	lcr0(cr0);
627 }
628 
629 static void
630 fbt_suspend(void *arg, dtrace_id_t id, void *parg)
631 {
632 	fbt_probe_t *fbt = parg;
633 #if 0
634 	dtrace_modctl_t *ctl = fbt->fbtp_ctl;
635 #endif
636 	u_long psl;
637 	u_long cr0;
638 
639 #if 0	/* XXX TBD */
640 	ASSERT(ctl->nenabled > 0);
641 
642 	if ((ctl->loadcnt != fbt->fbtp_loadcnt))
643 		return;
644 #endif
645 
646 	/* Disable interrupts. */
647 	psl = x86_read_psl();
648 	x86_disable_intr();
649 
650 	/* Disable write protection in supervisor mode. */
651 	cr0 = rcr0();
652 	lcr0(cr0 & ~CR0_WP);
653 
654 	for (; fbt != NULL; fbt = fbt->fbtp_next)
655 		*fbt->fbtp_patchpoint = fbt->fbtp_savedval;
656 
657 	/* Write back and invalidate cache, flush pipelines. */
658 	wbinvd();
659 	x86_flush();
660 	x86_write_psl(psl);
661 
662 	/* Re-enable write protection. */
663 	lcr0(cr0);
664 }
665 
666 static void
667 fbt_resume(void *arg, dtrace_id_t id, void *parg)
668 {
669 	fbt_probe_t *fbt = parg;
670 #if 0
671 	dtrace_modctl_t *ctl = fbt->fbtp_ctl;
672 #endif
673 	u_long psl;
674 	u_long cr0;
675 
676 #if 0	/* XXX TBD */
677 	ASSERT(ctl->nenabled > 0);
678 
679 	if ((ctl->loadcnt != fbt->fbtp_loadcnt))
680 		return;
681 #endif
682 	/* Disable interrupts. */
683 	psl = x86_read_psl();
684 	x86_disable_intr();
685 
686 	/* Disable write protection in supervisor mode. */
687 	cr0 = rcr0();
688 	lcr0(cr0 & ~CR0_WP);
689 
690 	for (; fbt != NULL; fbt = fbt->fbtp_next)
691 		*fbt->fbtp_patchpoint = fbt->fbtp_patchval;
692 
693 	/* Write back and invalidate cache, flush pipelines. */
694 	wbinvd();
695 	x86_flush();
696 	x86_write_psl(psl);
697 
698 	/* Re-enable write protection. */
699 	lcr0(cr0);
700 }
701 
702 static int
703 fbt_ctfoff_init(dtrace_modctl_t *mod, mod_ctf_t *mc)
704 {
705 	const Elf_Sym *symp = mc->symtab;
706 	const char *name;
707 	const ctf_header_t *hp = (const ctf_header_t *) mc->ctftab;
708 	const uint8_t *ctfdata = mc->ctftab + sizeof(ctf_header_t);
709 	int i;
710 	uint32_t *ctfoff;
711 	uint32_t objtoff = hp->cth_objtoff;
712 	uint32_t funcoff = hp->cth_funcoff;
713 	ushort_t info;
714 	ushort_t vlen;
715 	int nsyms = (mc->nmap != NULL) ? mc->nmapsize : mc->nsym;
716 
717 	/* Sanity check. */
718 	if (hp->cth_magic != CTF_MAGIC) {
719 		printf("Bad magic value in CTF data of '%s'\n",
720 			mod->mod_info->mi_name);
721 		return (EINVAL);
722 	}
723 
724 	if (mc->symtab == NULL) {
725 		printf("No symbol table in '%s'\n",
726 			mod->mod_info->mi_name);
727 		return (EINVAL);
728 	}
729 
730 	if ((ctfoff = malloc(sizeof(uint32_t) * nsyms, M_FBT, M_WAITOK)) == NULL)
731 		return (ENOMEM);
732 
733 	mc->ctfoffp = ctfoff;
734 
735 	for (i = 0; i < nsyms; i++, ctfoff++, symp++) {
736 	   	if (mc->nmap != NULL) {
737 			if (mc->nmap[i] == 0) {
738 				printf("%s.%d: Error! Got zero nmap!\n",
739 					__func__, __LINE__);
740 				continue;
741 			}
742 
743 			/* CTF expects the pre-sorted symbol ordering,
744 			 * so map it from that to the current sorted
745 			 * and trimmed symbol table.
746 			 * ctfoff[new-ind] = oldind symbol info.
747 			 */
748 
749 			/* map old index to new symbol table */
750 			symp = &mc->symtab[mc->nmap[i] - 1];
751 
752 			/* map old index to new ctfoff index */
753 			ctfoff = &mc->ctfoffp[mc->nmap[i]-1];
754 		}
755 
756 		if (symp->st_name == 0 || symp->st_shndx == SHN_UNDEF) {
757 			*ctfoff = 0xffffffff;
758 			continue;
759 		}
760 
761 		if (symp->st_name < mc->strcnt)
762 			name = mc->strtab + symp->st_name;
763 		else
764 			name = "(?)";
765 
766 		switch (ELF_ST_TYPE(symp->st_info)) {
767 		case STT_OBJECT:
768 			if (objtoff >= hp->cth_funcoff ||
769                             (symp->st_shndx == SHN_ABS && symp->st_value == 0)) {
770 				*ctfoff = 0xffffffff;
771                                 break;
772                         }
773 
774                         *ctfoff = objtoff;
775                         objtoff += sizeof (ushort_t);
776 			break;
777 
778 		case STT_FUNC:
779 			if (funcoff >= hp->cth_typeoff) {
780 				*ctfoff = 0xffffffff;
781 				break;
782 			}
783 
784 			*ctfoff = funcoff;
785 
786 			info = *((const ushort_t *)(ctfdata + funcoff));
787 			vlen = CTF_INFO_VLEN(info);
788 
789 			/*
790 			 * If we encounter a zero pad at the end, just skip it.
791 			 * Otherwise skip over the function and its return type
792 			 * (+2) and the argument list (vlen).
793 			 */
794 			if (CTF_INFO_KIND(info) == CTF_K_UNKNOWN && vlen == 0)
795 				funcoff += sizeof (ushort_t); /* skip pad */
796 			else
797 				funcoff += sizeof (ushort_t) * (vlen + 2);
798 			break;
799 
800 		default:
801 			*ctfoff = 0xffffffff;
802 			break;
803 		}
804 	}
805 
806 	return (0);
807 }
808 
809 static ssize_t
810 fbt_get_ctt_size(uint8_t xversion, const ctf_type_t *tp, ssize_t *sizep,
811     ssize_t *incrementp)
812 {
813 	ssize_t size, increment;
814 
815 	if (xversion > CTF_VERSION_1 &&
816 	    tp->ctt_size == CTF_LSIZE_SENT) {
817 		size = CTF_TYPE_LSIZE(tp);
818 		increment = sizeof (ctf_type_t);
819 	} else {
820 		size = tp->ctt_size;
821 		increment = sizeof (ctf_stype_t);
822 	}
823 
824 	if (sizep)
825 		*sizep = size;
826 	if (incrementp)
827 		*incrementp = increment;
828 
829 	return (size);
830 }
831 
832 static int
833 fbt_typoff_init(mod_ctf_t *mc)
834 {
835 	const ctf_header_t *hp = (const ctf_header_t *) mc->ctftab;
836 	const ctf_type_t *tbuf;
837 	const ctf_type_t *tend;
838 	const ctf_type_t *tp;
839 	const uint8_t *ctfdata = mc->ctftab + sizeof(ctf_header_t);
840 	int ctf_typemax = 0;
841 	uint32_t *xp;
842 	ulong_t pop[CTF_K_MAX + 1] = { 0 };
843 
844 	/* Sanity check. */
845 	if (hp->cth_magic != CTF_MAGIC)
846 		return (EINVAL);
847 
848 	tbuf = (const ctf_type_t *) (ctfdata + hp->cth_typeoff);
849 	tend = (const ctf_type_t *) (ctfdata + hp->cth_stroff);
850 
851 	int child = hp->cth_parname != 0;
852 
853 	/*
854 	 * We make two passes through the entire type section.  In this first
855 	 * pass, we count the number of each type and the total number of types.
856 	 */
857 	for (tp = tbuf; tp < tend; ctf_typemax++) {
858 		ushort_t kind = CTF_INFO_KIND(tp->ctt_info);
859 		ulong_t vlen = CTF_INFO_VLEN(tp->ctt_info);
860 		ssize_t size, increment;
861 
862 		size_t vbytes;
863 		uint_t n;
864 
865 		(void) fbt_get_ctt_size(hp->cth_version, tp, &size, &increment);
866 
867 		switch (kind) {
868 		case CTF_K_INTEGER:
869 		case CTF_K_FLOAT:
870 			vbytes = sizeof (uint_t);
871 			break;
872 		case CTF_K_ARRAY:
873 			vbytes = sizeof (ctf_array_t);
874 			break;
875 		case CTF_K_FUNCTION:
876 			vbytes = sizeof (ushort_t) * (vlen + (vlen & 1));
877 			break;
878 		case CTF_K_STRUCT:
879 		case CTF_K_UNION:
880 			if (size < CTF_LSTRUCT_THRESH) {
881 				ctf_member_t *mp = (ctf_member_t *)
882 				    ((uintptr_t)tp + increment);
883 
884 				vbytes = sizeof (ctf_member_t) * vlen;
885 				for (n = vlen; n != 0; n--, mp++)
886 					child |= CTF_TYPE_ISCHILD(mp->ctm_type);
887 			} else {
888 				ctf_lmember_t *lmp = (ctf_lmember_t *)
889 				    ((uintptr_t)tp + increment);
890 
891 				vbytes = sizeof (ctf_lmember_t) * vlen;
892 				for (n = vlen; n != 0; n--, lmp++)
893 					child |=
894 					    CTF_TYPE_ISCHILD(lmp->ctlm_type);
895 			}
896 			break;
897 		case CTF_K_ENUM:
898 			vbytes = sizeof (ctf_enum_t) * vlen;
899 			break;
900 		case CTF_K_FORWARD:
901 			/*
902 			 * For forward declarations, ctt_type is the CTF_K_*
903 			 * kind for the tag, so bump that population count too.
904 			 * If ctt_type is unknown, treat the tag as a struct.
905 			 */
906 			if (tp->ctt_type == CTF_K_UNKNOWN ||
907 			    tp->ctt_type >= CTF_K_MAX)
908 				pop[CTF_K_STRUCT]++;
909 			else
910 				pop[tp->ctt_type]++;
911 			/*FALLTHRU*/
912 		case CTF_K_UNKNOWN:
913 			vbytes = 0;
914 			break;
915 		case CTF_K_POINTER:
916 		case CTF_K_TYPEDEF:
917 		case CTF_K_VOLATILE:
918 		case CTF_K_CONST:
919 		case CTF_K_RESTRICT:
920 			child |= CTF_TYPE_ISCHILD(tp->ctt_type);
921 			vbytes = 0;
922 			break;
923 		default:
924 			printf("%s(%d): detected invalid CTF kind -- %u\n", __func__, __LINE__, kind);
925 			return (EIO);
926 		}
927 		tp = (ctf_type_t *)((uintptr_t)tp + increment + vbytes);
928 		pop[kind]++;
929 	}
930 
931 	mc->typlen = ctf_typemax;
932 
933 	if ((xp = malloc(sizeof(uint32_t) * ctf_typemax, M_FBT, M_ZERO | M_WAITOK)) == NULL)
934 		return (ENOMEM);
935 
936 	mc->typoffp = xp;
937 
938 	/* type id 0 is used as a sentinel value */
939 	*xp++ = 0;
940 
941 	/*
942 	 * In the second pass, fill in the type offset.
943 	 */
944 	for (tp = tbuf; tp < tend; xp++) {
945 		ushort_t kind = CTF_INFO_KIND(tp->ctt_info);
946 		ulong_t vlen = CTF_INFO_VLEN(tp->ctt_info);
947 		ssize_t size, increment;
948 
949 		size_t vbytes;
950 		uint_t n;
951 
952 		(void) fbt_get_ctt_size(hp->cth_version, tp, &size, &increment);
953 
954 		switch (kind) {
955 		case CTF_K_INTEGER:
956 		case CTF_K_FLOAT:
957 			vbytes = sizeof (uint_t);
958 			break;
959 		case CTF_K_ARRAY:
960 			vbytes = sizeof (ctf_array_t);
961 			break;
962 		case CTF_K_FUNCTION:
963 			vbytes = sizeof (ushort_t) * (vlen + (vlen & 1));
964 			break;
965 		case CTF_K_STRUCT:
966 		case CTF_K_UNION:
967 			if (size < CTF_LSTRUCT_THRESH) {
968 				ctf_member_t *mp = (ctf_member_t *)
969 				    ((uintptr_t)tp + increment);
970 
971 				vbytes = sizeof (ctf_member_t) * vlen;
972 				for (n = vlen; n != 0; n--, mp++)
973 					child |= CTF_TYPE_ISCHILD(mp->ctm_type);
974 			} else {
975 				ctf_lmember_t *lmp = (ctf_lmember_t *)
976 				    ((uintptr_t)tp + increment);
977 
978 				vbytes = sizeof (ctf_lmember_t) * vlen;
979 				for (n = vlen; n != 0; n--, lmp++)
980 					child |=
981 					    CTF_TYPE_ISCHILD(lmp->ctlm_type);
982 			}
983 			break;
984 		case CTF_K_ENUM:
985 			vbytes = sizeof (ctf_enum_t) * vlen;
986 			break;
987 		case CTF_K_FORWARD:
988 		case CTF_K_UNKNOWN:
989 			vbytes = 0;
990 			break;
991 		case CTF_K_POINTER:
992 		case CTF_K_TYPEDEF:
993 		case CTF_K_VOLATILE:
994 		case CTF_K_CONST:
995 		case CTF_K_RESTRICT:
996 			vbytes = 0;
997 			break;
998 		default:
999 			printf("%s(%d): detected invalid CTF kind -- %u\n", __func__, __LINE__, kind);
1000 			return (EIO);
1001 		}
1002 		*xp = (uint32_t)((uintptr_t) tp - (uintptr_t) ctfdata);
1003 		tp = (ctf_type_t *)((uintptr_t)tp + increment + vbytes);
1004 	}
1005 
1006 	return (0);
1007 }
1008 
1009 /*
1010  * CTF Declaration Stack
1011  *
1012  * In order to implement ctf_type_name(), we must convert a type graph back
1013  * into a C type declaration.  Unfortunately, a type graph represents a storage
1014  * class ordering of the type whereas a type declaration must obey the C rules
1015  * for operator precedence, and the two orderings are frequently in conflict.
1016  * For example, consider these CTF type graphs and their C declarations:
1017  *
1018  * CTF_K_POINTER -> CTF_K_FUNCTION -> CTF_K_INTEGER  : int (*)()
1019  * CTF_K_POINTER -> CTF_K_ARRAY -> CTF_K_INTEGER     : int (*)[]
1020  *
1021  * In each case, parentheses are used to raise operator * to higher lexical
1022  * precedence, so the string form of the C declaration cannot be constructed by
1023  * walking the type graph links and forming the string from left to right.
1024  *
1025  * The functions in this file build a set of stacks from the type graph nodes
1026  * corresponding to the C operator precedence levels in the appropriate order.
1027  * The code in ctf_type_name() can then iterate over the levels and nodes in
1028  * lexical precedence order and construct the final C declaration string.
1029  */
1030 typedef struct ctf_list {
1031 	struct ctf_list *l_prev; /* previous pointer or tail pointer */
1032 	struct ctf_list *l_next; /* next pointer or head pointer */
1033 } ctf_list_t;
1034 
1035 #define	ctf_list_prev(elem)	((void *)(((ctf_list_t *)(elem))->l_prev))
1036 #define	ctf_list_next(elem)	((void *)(((ctf_list_t *)(elem))->l_next))
1037 
1038 typedef enum {
1039 	CTF_PREC_BASE,
1040 	CTF_PREC_POINTER,
1041 	CTF_PREC_ARRAY,
1042 	CTF_PREC_FUNCTION,
1043 	CTF_PREC_MAX
1044 } ctf_decl_prec_t;
1045 
1046 typedef struct ctf_decl_node {
1047 	ctf_list_t cd_list;			/* linked list pointers */
1048 	ctf_id_t cd_type;			/* type identifier */
1049 	uint_t cd_kind;				/* type kind */
1050 	uint_t cd_n;				/* type dimension if array */
1051 } ctf_decl_node_t;
1052 
1053 typedef struct ctf_decl {
1054 	ctf_list_t cd_nodes[CTF_PREC_MAX];	/* declaration node stacks */
1055 	int cd_order[CTF_PREC_MAX];		/* storage order of decls */
1056 	ctf_decl_prec_t cd_qualp;		/* qualifier precision */
1057 	ctf_decl_prec_t cd_ordp;		/* ordered precision */
1058 	char *cd_buf;				/* buffer for output */
1059 	char *cd_ptr;				/* buffer location */
1060 	char *cd_end;				/* buffer limit */
1061 	size_t cd_len;				/* buffer space required */
1062 	int cd_err;				/* saved error value */
1063 } ctf_decl_t;
1064 
1065 /*
1066  * Simple doubly-linked list append routine.  This implementation assumes that
1067  * each list element contains an embedded ctf_list_t as the first member.
1068  * An additional ctf_list_t is used to store the head (l_next) and tail
1069  * (l_prev) pointers.  The current head and tail list elements have their
1070  * previous and next pointers set to NULL, respectively.
1071  */
1072 static void
1073 ctf_list_append(ctf_list_t *lp, void *new)
1074 {
1075 	ctf_list_t *p = lp->l_prev;	/* p = tail list element */
1076 	ctf_list_t *q = new;		/* q = new list element */
1077 
1078 	lp->l_prev = q;
1079 	q->l_prev = p;
1080 	q->l_next = NULL;
1081 
1082 	if (p != NULL)
1083 		p->l_next = q;
1084 	else
1085 		lp->l_next = q;
1086 }
1087 
1088 /*
1089  * Prepend the specified existing element to the given ctf_list_t.  The
1090  * existing pointer should be pointing at a struct with embedded ctf_list_t.
1091  */
1092 static void
1093 ctf_list_prepend(ctf_list_t *lp, void *new)
1094 {
1095 	ctf_list_t *p = new;		/* p = new list element */
1096 	ctf_list_t *q = lp->l_next;	/* q = head list element */
1097 
1098 	lp->l_next = p;
1099 	p->l_prev = NULL;
1100 	p->l_next = q;
1101 
1102 	if (q != NULL)
1103 		q->l_prev = p;
1104 	else
1105 		lp->l_prev = p;
1106 }
1107 
1108 static void
1109 ctf_decl_init(ctf_decl_t *cd, char *buf, size_t len)
1110 {
1111 	int i;
1112 
1113 	bzero(cd, sizeof (ctf_decl_t));
1114 
1115 	for (i = CTF_PREC_BASE; i < CTF_PREC_MAX; i++)
1116 		cd->cd_order[i] = CTF_PREC_BASE - 1;
1117 
1118 	cd->cd_qualp = CTF_PREC_BASE;
1119 	cd->cd_ordp = CTF_PREC_BASE;
1120 
1121 	cd->cd_buf = buf;
1122 	cd->cd_ptr = buf;
1123 	cd->cd_end = buf + len;
1124 }
1125 
1126 static void
1127 ctf_decl_fini(ctf_decl_t *cd)
1128 {
1129 	ctf_decl_node_t *cdp, *ndp;
1130 	int i;
1131 
1132 	for (i = CTF_PREC_BASE; i < CTF_PREC_MAX; i++) {
1133 		for (cdp = ctf_list_next(&cd->cd_nodes[i]);
1134 		    cdp != NULL; cdp = ndp) {
1135 			ndp = ctf_list_next(cdp);
1136 			free(cdp, M_FBT);
1137 		}
1138 	}
1139 }
1140 
1141 static const ctf_type_t *
1142 ctf_lookup_by_id(mod_ctf_t *mc, ctf_id_t type)
1143 {
1144 	const ctf_type_t *tp;
1145 	uint32_t offset;
1146 	uint32_t *typoff = mc->typoffp;
1147 
1148 	if (type >= mc->typlen) {
1149 		printf("%s(%d): type %d exceeds max %ld\n",__func__,__LINE__,(int) type,mc->typlen);
1150 		return(NULL);
1151 	}
1152 
1153 	/* Check if the type isn't cross-referenced. */
1154 	if ((offset = typoff[type]) == 0) {
1155 		printf("%s(%d): type %d isn't cross referenced\n",__func__,__LINE__, (int) type);
1156 		return(NULL);
1157 	}
1158 
1159 	tp = (const ctf_type_t *)(mc->ctftab + offset + sizeof(ctf_header_t));
1160 
1161 	return (tp);
1162 }
1163 
1164 static void
1165 fbt_array_info(mod_ctf_t *mc, ctf_id_t type, ctf_arinfo_t *arp)
1166 {
1167 	const ctf_header_t *hp = (const ctf_header_t *) mc->ctftab;
1168 	const ctf_type_t *tp;
1169 	const ctf_array_t *ap;
1170 	ssize_t increment;
1171 
1172 	bzero(arp, sizeof(*arp));
1173 
1174 	if ((tp = ctf_lookup_by_id(mc, type)) == NULL)
1175 		return;
1176 
1177 	if (CTF_INFO_KIND(tp->ctt_info) != CTF_K_ARRAY)
1178 		return;
1179 
1180 	(void) fbt_get_ctt_size(hp->cth_version, tp, NULL, &increment);
1181 
1182 	ap = (const ctf_array_t *)((uintptr_t)tp + increment);
1183 	arp->ctr_contents = ap->cta_contents;
1184 	arp->ctr_index = ap->cta_index;
1185 	arp->ctr_nelems = ap->cta_nelems;
1186 }
1187 
1188 static const char *
1189 ctf_strptr(mod_ctf_t *mc, int name)
1190 {
1191 	const ctf_header_t *hp = (const ctf_header_t *) mc->ctftab;;
1192 	const char *strp = "";
1193 
1194 	if (name < 0 || name >= hp->cth_strlen)
1195 		return(strp);
1196 
1197 	strp = (const char *)(mc->ctftab + hp->cth_stroff + name + sizeof(ctf_header_t));
1198 
1199 	return (strp);
1200 }
1201 
1202 static void
1203 ctf_decl_push(ctf_decl_t *cd, mod_ctf_t *mc, ctf_id_t type)
1204 {
1205 	ctf_decl_node_t *cdp;
1206 	ctf_decl_prec_t prec;
1207 	uint_t kind, n = 1;
1208 	int is_qual = 0;
1209 
1210 	const ctf_type_t *tp;
1211 	ctf_arinfo_t ar;
1212 
1213 	if ((tp = ctf_lookup_by_id(mc, type)) == NULL) {
1214 		cd->cd_err = ENOENT;
1215 		return;
1216 	}
1217 
1218 	switch (kind = CTF_INFO_KIND(tp->ctt_info)) {
1219 	case CTF_K_ARRAY:
1220 		fbt_array_info(mc, type, &ar);
1221 		ctf_decl_push(cd, mc, ar.ctr_contents);
1222 		n = ar.ctr_nelems;
1223 		prec = CTF_PREC_ARRAY;
1224 		break;
1225 
1226 	case CTF_K_TYPEDEF:
1227 		if (ctf_strptr(mc, tp->ctt_name)[0] == '\0') {
1228 			ctf_decl_push(cd, mc, tp->ctt_type);
1229 			return;
1230 		}
1231 		prec = CTF_PREC_BASE;
1232 		break;
1233 
1234 	case CTF_K_FUNCTION:
1235 		ctf_decl_push(cd, mc, tp->ctt_type);
1236 		prec = CTF_PREC_FUNCTION;
1237 		break;
1238 
1239 	case CTF_K_POINTER:
1240 		ctf_decl_push(cd, mc, tp->ctt_type);
1241 		prec = CTF_PREC_POINTER;
1242 		break;
1243 
1244 	case CTF_K_VOLATILE:
1245 	case CTF_K_CONST:
1246 	case CTF_K_RESTRICT:
1247 		ctf_decl_push(cd, mc, tp->ctt_type);
1248 		prec = cd->cd_qualp;
1249 		is_qual++;
1250 		break;
1251 
1252 	default:
1253 		prec = CTF_PREC_BASE;
1254 	}
1255 
1256 	if ((cdp = malloc(sizeof (ctf_decl_node_t), M_FBT, M_WAITOK)) == NULL) {
1257 		cd->cd_err = EAGAIN;
1258 		return;
1259 	}
1260 
1261 	cdp->cd_type = type;
1262 	cdp->cd_kind = kind;
1263 	cdp->cd_n = n;
1264 
1265 	if (ctf_list_next(&cd->cd_nodes[prec]) == NULL)
1266 		cd->cd_order[prec] = cd->cd_ordp++;
1267 
1268 	/*
1269 	 * Reset cd_qualp to the highest precedence level that we've seen so
1270 	 * far that can be qualified (CTF_PREC_BASE or CTF_PREC_POINTER).
1271 	 */
1272 	if (prec > cd->cd_qualp && prec < CTF_PREC_ARRAY)
1273 		cd->cd_qualp = prec;
1274 
1275 	/*
1276 	 * C array declarators are ordered inside out so prepend them.  Also by
1277 	 * convention qualifiers of base types precede the type specifier (e.g.
1278 	 * const int vs. int const) even though the two forms are equivalent.
1279 	 */
1280 	if (kind == CTF_K_ARRAY || (is_qual && prec == CTF_PREC_BASE))
1281 		ctf_list_prepend(&cd->cd_nodes[prec], cdp);
1282 	else
1283 		ctf_list_append(&cd->cd_nodes[prec], cdp);
1284 }
1285 
1286 static void
1287 ctf_decl_sprintf(ctf_decl_t *cd, const char *format, ...)
1288 {
1289 	size_t len = (size_t)(cd->cd_end - cd->cd_ptr);
1290 	va_list ap;
1291 	size_t n;
1292 
1293 	va_start(ap, format);
1294 	n = vsnprintf(cd->cd_ptr, len, format, ap);
1295 	va_end(ap);
1296 
1297 	cd->cd_ptr += MIN(n, len);
1298 	cd->cd_len += n;
1299 }
1300 
1301 static ssize_t
1302 fbt_type_name(mod_ctf_t *mc, ctf_id_t type, char *buf, size_t len)
1303 {
1304 	ctf_decl_t cd;
1305 	ctf_decl_node_t *cdp;
1306 	ctf_decl_prec_t prec, lp, rp;
1307 	int ptr, arr;
1308 	uint_t k;
1309 
1310 	if (mc == NULL && type == CTF_ERR)
1311 		return (-1); /* simplify caller code by permitting CTF_ERR */
1312 
1313 	ctf_decl_init(&cd, buf, len);
1314 	ctf_decl_push(&cd, mc, type);
1315 
1316 	if (cd.cd_err != 0) {
1317 		ctf_decl_fini(&cd);
1318 		return (-1);
1319 	}
1320 
1321 	/*
1322 	 * If the type graph's order conflicts with lexical precedence order
1323 	 * for pointers or arrays, then we need to surround the declarations at
1324 	 * the corresponding lexical precedence with parentheses.  This can
1325 	 * result in either a parenthesized pointer (*) as in int (*)() or
1326 	 * int (*)[], or in a parenthesized pointer and array as in int (*[])().
1327 	 */
1328 	ptr = cd.cd_order[CTF_PREC_POINTER] > CTF_PREC_POINTER;
1329 	arr = cd.cd_order[CTF_PREC_ARRAY] > CTF_PREC_ARRAY;
1330 
1331 	rp = arr ? CTF_PREC_ARRAY : ptr ? CTF_PREC_POINTER : -1;
1332 	lp = ptr ? CTF_PREC_POINTER : arr ? CTF_PREC_ARRAY : -1;
1333 
1334 	k = CTF_K_POINTER; /* avoid leading whitespace (see below) */
1335 
1336 	for (prec = CTF_PREC_BASE; prec < CTF_PREC_MAX; prec++) {
1337 		for (cdp = ctf_list_next(&cd.cd_nodes[prec]);
1338 		    cdp != NULL; cdp = ctf_list_next(cdp)) {
1339 
1340 			const ctf_type_t *tp =
1341 			    ctf_lookup_by_id(mc, cdp->cd_type);
1342 			const char *name = ctf_strptr(mc, tp->ctt_name);
1343 
1344 			if (k != CTF_K_POINTER && k != CTF_K_ARRAY)
1345 				ctf_decl_sprintf(&cd, " ");
1346 
1347 			if (lp == prec) {
1348 				ctf_decl_sprintf(&cd, "(");
1349 				lp = -1;
1350 			}
1351 
1352 			switch (cdp->cd_kind) {
1353 			case CTF_K_INTEGER:
1354 			case CTF_K_FLOAT:
1355 			case CTF_K_TYPEDEF:
1356 				ctf_decl_sprintf(&cd, "%s", name);
1357 				break;
1358 			case CTF_K_POINTER:
1359 				ctf_decl_sprintf(&cd, "*");
1360 				break;
1361 			case CTF_K_ARRAY:
1362 				ctf_decl_sprintf(&cd, "[%u]", cdp->cd_n);
1363 				break;
1364 			case CTF_K_FUNCTION:
1365 				ctf_decl_sprintf(&cd, "()");
1366 				break;
1367 			case CTF_K_STRUCT:
1368 			case CTF_K_FORWARD:
1369 				ctf_decl_sprintf(&cd, "struct %s", name);
1370 				break;
1371 			case CTF_K_UNION:
1372 				ctf_decl_sprintf(&cd, "union %s", name);
1373 				break;
1374 			case CTF_K_ENUM:
1375 				ctf_decl_sprintf(&cd, "enum %s", name);
1376 				break;
1377 			case CTF_K_VOLATILE:
1378 				ctf_decl_sprintf(&cd, "volatile");
1379 				break;
1380 			case CTF_K_CONST:
1381 				ctf_decl_sprintf(&cd, "const");
1382 				break;
1383 			case CTF_K_RESTRICT:
1384 				ctf_decl_sprintf(&cd, "restrict");
1385 				break;
1386 			}
1387 
1388 			k = cdp->cd_kind;
1389 		}
1390 
1391 		if (rp == prec)
1392 			ctf_decl_sprintf(&cd, ")");
1393 	}
1394 
1395 	ctf_decl_fini(&cd);
1396 	return (cd.cd_len);
1397 }
1398 
1399 static void
1400 fbt_getargdesc(void *arg __unused, dtrace_id_t id __unused, void *parg, dtrace_argdesc_t *desc)
1401 {
1402 	const ushort_t *dp;
1403 	fbt_probe_t *fbt = parg;
1404 	mod_ctf_t mc;
1405 	dtrace_modctl_t *ctl = fbt->fbtp_ctl;
1406 	int ndx = desc->dtargd_ndx;
1407 	int symindx = fbt->fbtp_symindx;
1408 	uint32_t *ctfoff;
1409 	uint32_t offset;
1410 	ushort_t info, kind, n;
1411 	int nsyms;
1412 
1413 	desc->dtargd_ndx = DTRACE_ARGNONE;
1414 
1415 	/* Get a pointer to the CTF data and it's length. */
1416 	if (mod_ctf_get(ctl, &mc) != 0) {
1417 		static int report=0;
1418 		if (report < 1) {
1419 		    report++;
1420 		    printf("FBT: Error no CTF section found in module \"%s\"\n",
1421 			    ctl->mod_info->mi_name);
1422 		}
1423 		/* No CTF data? Something wrong? *shrug* */
1424 		return;
1425 	}
1426 
1427 	nsyms = (mc.nmap != NULL) ? mc.nmapsize : mc.nsym;
1428 
1429 	/* Check if this module hasn't been initialised yet. */
1430 	if (mc.ctfoffp == NULL) {
1431 		/*
1432 		 * Initialise the CTF object and function symindx to
1433 		 * byte offset array.
1434 		 */
1435 		if (fbt_ctfoff_init(ctl, &mc) != 0) {
1436 			return;
1437 		}
1438 
1439 		/* Initialise the CTF type to byte offset array. */
1440 		if (fbt_typoff_init(&mc) != 0) {
1441 			return;
1442 		}
1443 	}
1444 
1445 	ctfoff = mc.ctfoffp;
1446 
1447 	if (ctfoff == NULL || mc.typoffp == NULL) {
1448 		return;
1449 	}
1450 
1451 	/* Check if the symbol index is out of range. */
1452 	if (symindx >= nsyms)
1453 		return;
1454 
1455 	/* Check if the symbol isn't cross-referenced. */
1456 	if ((offset = ctfoff[symindx]) == 0xffffffff)
1457 		return;
1458 
1459 	dp = (const ushort_t *)(mc.ctftab + offset + sizeof(ctf_header_t));
1460 
1461 	info = *dp++;
1462 	kind = CTF_INFO_KIND(info);
1463 	n = CTF_INFO_VLEN(info);
1464 
1465 	if (kind == CTF_K_UNKNOWN && n == 0) {
1466 		printf("%s(%d): Unknown function %s!\n",__func__,__LINE__,
1467 		    fbt->fbtp_name);
1468 		return;
1469 	}
1470 
1471 	if (kind != CTF_K_FUNCTION) {
1472 		printf("%s(%d): Expected a function %s!\n",__func__,__LINE__,
1473 		    fbt->fbtp_name);
1474 		return;
1475 	}
1476 
1477 	/* Check if the requested argument doesn't exist. */
1478 	if (ndx >= n)
1479 		return;
1480 
1481 	/* Skip the return type and arguments up to the one requested. */
1482 	dp += ndx + 1;
1483 
1484 	if (fbt_type_name(&mc, *dp, desc->dtargd_native, sizeof(desc->dtargd_native)) > 0) {
1485 		desc->dtargd_ndx = ndx;
1486 	}
1487 
1488 	return;
1489 }
1490 
1491 static void
1492 fbt_load(void)
1493 {
1494 	/* Default the probe table size if not specified. */
1495 	if (fbt_probetab_size == 0)
1496 		fbt_probetab_size = FBT_PROBETAB_SIZE;
1497 
1498 	/* Choose the hash mask for the probe table. */
1499 	fbt_probetab_mask = fbt_probetab_size - 1;
1500 
1501 	/* Allocate memory for the probe table. */
1502 	fbt_probetab =
1503 	    malloc(fbt_probetab_size * sizeof (fbt_probe_t *), M_FBT, M_WAITOK | M_ZERO);
1504 
1505 	dtrace_doubletrap_func = fbt_doubletrap;
1506 	dtrace_invop_add(fbt_invop);
1507 
1508 	if (dtrace_register("fbt", &fbt_attr, DTRACE_PRIV_USER,
1509 	    NULL, &fbt_pops, NULL, &fbt_id) != 0)
1510 		return;
1511 }
1512 
1513 
1514 static int
1515 fbt_unload(void)
1516 {
1517 	int error = 0;
1518 
1519 	/* De-register the invalid opcode handler. */
1520 	dtrace_invop_remove(fbt_invop);
1521 
1522 	dtrace_doubletrap_func = NULL;
1523 
1524 	/* De-register this DTrace provider. */
1525 	if ((error = dtrace_unregister(fbt_id)) != 0)
1526 		return (error);
1527 
1528 	/* Free the probe table. */
1529 	free(fbt_probetab, M_FBT);
1530 	fbt_probetab = NULL;
1531 	fbt_probetab_mask = 0;
1532 
1533 	return (error);
1534 }
1535 
1536 
1537 static int
1538 fbt_modcmd(modcmd_t cmd, void *data)
1539 {
1540 	int bmajor = -1, cmajor = -1;
1541 
1542 	switch (cmd) {
1543 	case MODULE_CMD_INIT:
1544 		fbt_load();
1545 		return devsw_attach("fbt", NULL, &bmajor,
1546 		    &fbt_cdevsw, &cmajor);
1547 	case MODULE_CMD_FINI:
1548 		fbt_unload();
1549 		return devsw_detach(NULL, &fbt_cdevsw);
1550 	default:
1551 		return ENOTTY;
1552 	}
1553 }
1554 
1555 static int
1556 fbt_open(dev_t dev, int flags, int mode, struct lwp *l)
1557 {
1558 	return (0);
1559 }
1560 
1561 MODULE(MODULE_CLASS_MISC, fbt, "dtrace");
1562