xref: /openbsd-src/sys/dev/dt/dt_dev.c (revision 68dd5bb1859285b71cb62a10bf107b8ad54064d9)
1 /*	$OpenBSD: dt_dev.c,v 1.29 2024/01/02 16:32:48 bluhm Exp $ */
2 
3 /*
4  * Copyright (c) 2019 Martin Pieuchot <mpi@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/types.h>
20 #include <sys/systm.h>
21 #include <sys/param.h>
22 #include <sys/device.h>
23 #include <sys/exec_elf.h>
24 #include <sys/malloc.h>
25 #include <sys/proc.h>
26 #include <sys/ptrace.h>
27 
28 #include <dev/dt/dtvar.h>
29 
30 /*
31  * Number of frames to skip in stack traces.
32  *
33  * The number of frames required to execute dt(4) profiling code
34  * depends on the probe, context, architecture and possibly the
35  * compiler.
36  *
37  * Static probes (tracepoints) are executed in the context of the
38  * current thread and only need to skip frames up to the recording
39  * function.  For example the syscall provider:
40  *
41  *	dt_prov_syscall_entry+0x141
42  *	syscall+0x205		<--- start here
43  *	Xsyscall+0x128
44  *
45  * Probes executed in their own context, like the profile provider,
46  * need to skip the frames of that context which are different for
47  * every architecture.  For example the profile provider executed
48  * from hardclock(9) on amd64:
49  *
50  *	dt_prov_profile_enter+0x6e
51  *	hardclock+0x1a9
52  *	lapic_clockintr+0x3f
53  *	Xresume_lapic_ltimer+0x26
54  *	acpicpu_idle+0x1d2	<---- start here.
55  *	sched_idle+0x225
56  *	proc_trampoline+0x1c
57  */
58 #if defined(__amd64__)
59 #define DT_FA_PROFILE	7
60 #define DT_FA_STATIC	2
61 #elif defined(__i386__)
62 #define DT_FA_PROFILE	8
63 #define DT_FA_STATIC	2
64 #elif defined(__macppc__)
65 #define DT_FA_PROFILE  7
66 #define DT_FA_STATIC   2
67 #elif defined(__octeon__)
68 #define DT_FA_PROFILE	6
69 #define DT_FA_STATIC	2
70 #elif defined(__powerpc64__)
71 #define DT_FA_PROFILE	6
72 #define DT_FA_STATIC	2
73 #elif defined(__sparc64__)
74 #define DT_FA_PROFILE	7
75 #define DT_FA_STATIC	1
76 #else
77 #define DT_FA_STATIC	0
78 #define DT_FA_PROFILE	0
79 #endif
80 
81 #define DT_EVTRING_SIZE	16	/* # of slots in per PCB event ring */
82 
83 #define DPRINTF(x...) /* nothing */
84 
85 /*
86  * Descriptor associated with each program opening /dev/dt.  It is used
87  * to keep track of enabled PCBs.
88  *
89  *  Locks used to protect struct members in this file:
90  *	m	per-softc mutex
91  *	K	kernel lock
92  */
93 struct dt_softc {
94 	SLIST_ENTRY(dt_softc)	 ds_next;	/* [K] descriptor list */
95 	int			 ds_unit;	/* [I] D_CLONE unique unit */
96 	pid_t			 ds_pid;	/* [I] PID of tracing program */
97 
98 	struct mutex		 ds_mtx;
99 
100 	struct dt_pcb_list	 ds_pcbs;	/* [K] list of enabled PCBs */
101 	struct dt_evt		*ds_bufqueue;	/* [K] copy evts to userland */
102 	size_t			 ds_bufqlen;	/* [K] length of the queue */
103 	int			 ds_recording;	/* [K] currently recording? */
104 	int			 ds_evtcnt;	/* [m] # of readable evts */
105 
106 	/* Counters */
107 	uint64_t		 ds_readevt;	/* [m] # of events read */
108 	uint64_t		 ds_dropevt;	/* [m] # of events dropped */
109 };
110 
111 SLIST_HEAD(, dt_softc) dtdev_list;	/* [K] list of open /dev/dt nodes */
112 
113 /*
114  * Probes are created during dt_attach() and never modified/freed during
115  * the lifetime of the system.  That's why we consider them as [I]mmutable.
116  */
117 unsigned int			dt_nprobes;	/* [I] # of probes available */
118 SIMPLEQ_HEAD(, dt_probe)	dt_probe_list;	/* [I] list of probes */
119 
120 struct rwlock			dt_lock = RWLOCK_INITIALIZER("dtlk");
121 volatile uint32_t		dt_tracing = 0;	/* [K] # of processes tracing */
122 
123 int allowdt;
124 
125 void	dtattach(struct device *, struct device *, void *);
126 int	dtopen(dev_t, int, int, struct proc *);
127 int	dtclose(dev_t, int, int, struct proc *);
128 int	dtread(dev_t, struct uio *, int);
129 int	dtioctl(dev_t, u_long, caddr_t, int, struct proc *);
130 
131 struct	dt_softc *dtlookup(int);
132 
133 int	dt_ioctl_list_probes(struct dt_softc *, struct dtioc_probe *);
134 int	dt_ioctl_get_args(struct dt_softc *, struct dtioc_arg *);
135 int	dt_ioctl_get_stats(struct dt_softc *, struct dtioc_stat *);
136 int	dt_ioctl_record_start(struct dt_softc *);
137 void	dt_ioctl_record_stop(struct dt_softc *);
138 int	dt_ioctl_probe_enable(struct dt_softc *, struct dtioc_req *);
139 int	dt_ioctl_probe_disable(struct dt_softc *, struct dtioc_req *);
140 int	dt_ioctl_get_auxbase(struct dt_softc *, struct dtioc_getaux *);
141 
142 int	dt_pcb_ring_copy(struct dt_pcb *, struct dt_evt *, size_t, uint64_t *);
143 
144 void
145 dtattach(struct device *parent, struct device *self, void *aux)
146 {
147 	SLIST_INIT(&dtdev_list);
148 	SIMPLEQ_INIT(&dt_probe_list);
149 
150 	/* Init providers */
151 	dt_nprobes += dt_prov_profile_init();
152 	dt_nprobes += dt_prov_syscall_init();
153 	dt_nprobes += dt_prov_static_init();
154 #ifdef DDBPROF
155 	dt_nprobes += dt_prov_kprobe_init();
156 #endif
157 }
158 
159 int
160 dtopen(dev_t dev, int flags, int mode, struct proc *p)
161 {
162 	struct dt_softc *sc;
163 	struct dt_evt *queue;
164 	size_t qlen;
165 	int unit = minor(dev);
166 
167 	if (!allowdt)
168 		return EPERM;
169 
170 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_CANFAIL|M_ZERO);
171 	if (sc == NULL)
172 		return ENOMEM;
173 
174 	/*
175 	 * Enough space to empty 2 full rings of events in a single read.
176 	 */
177 	qlen = 2 * DT_EVTRING_SIZE;
178 	queue = mallocarray(qlen, sizeof(*queue), M_DEVBUF, M_WAITOK|M_CANFAIL);
179 	if (queue == NULL) {
180 		free(sc, M_DEVBUF, sizeof(*sc));
181 		return ENOMEM;
182 	}
183 
184 	/* no sleep after this point */
185 	if (dtlookup(unit) != NULL) {
186 		free(queue, M_DEVBUF, qlen * sizeof(*queue));
187 		free(sc, M_DEVBUF, sizeof(*sc));
188 		return EBUSY;
189 	}
190 
191 	sc->ds_unit = unit;
192 	sc->ds_pid = p->p_p->ps_pid;
193 	TAILQ_INIT(&sc->ds_pcbs);
194 	mtx_init(&sc->ds_mtx, IPL_HIGH);
195 	sc->ds_bufqlen = qlen;
196 	sc->ds_bufqueue = queue;
197 	sc->ds_evtcnt = 0;
198 	sc->ds_readevt = 0;
199 	sc->ds_dropevt = 0;
200 
201 	SLIST_INSERT_HEAD(&dtdev_list, sc, ds_next);
202 
203 	DPRINTF("dt%d: pid %d open\n", sc->ds_unit, sc->ds_pid);
204 
205 	return 0;
206 }
207 
208 int
209 dtclose(dev_t dev, int flags, int mode, struct proc *p)
210 {
211 	struct dt_softc *sc;
212 	int unit = minor(dev);
213 
214 	sc = dtlookup(unit);
215 	KASSERT(sc != NULL);
216 
217 	DPRINTF("dt%d: pid %d close\n", sc->ds_unit, sc->ds_pid);
218 
219 	SLIST_REMOVE(&dtdev_list, sc, dt_softc, ds_next);
220 	dt_ioctl_record_stop(sc);
221 	dt_pcb_purge(&sc->ds_pcbs);
222 
223 	free(sc->ds_bufqueue, M_DEVBUF,
224 	    sc->ds_bufqlen * sizeof(*sc->ds_bufqueue));
225 	free(sc, M_DEVBUF, sizeof(*sc));
226 
227 	return 0;
228 }
229 
230 int
231 dtread(dev_t dev, struct uio *uio, int flags)
232 {
233 	struct dt_softc *sc;
234 	struct dt_evt *estq;
235 	struct dt_pcb *dp;
236 	int error = 0, unit = minor(dev);
237 	size_t qlen, count, read = 0;
238 	uint64_t dropped = 0;
239 
240 	sc = dtlookup(unit);
241 	KASSERT(sc != NULL);
242 
243 	count = howmany(uio->uio_resid, sizeof(struct dt_evt));
244 	if (count < 1)
245 		return (EMSGSIZE);
246 
247 	while (!sc->ds_evtcnt) {
248 		sleep_setup(sc, PWAIT | PCATCH, "dtread");
249 		error = sleep_finish(0, !sc->ds_evtcnt);
250 		if (error == EINTR || error == ERESTART)
251 			break;
252 	}
253 	if (error)
254 		return error;
255 
256 	estq = sc->ds_bufqueue;
257 	qlen = MIN(sc->ds_bufqlen, count);
258 
259 	KERNEL_ASSERT_LOCKED();
260 	TAILQ_FOREACH(dp, &sc->ds_pcbs, dp_snext) {
261 		count = dt_pcb_ring_copy(dp, estq, qlen, &dropped);
262 		read += count;
263 		estq += count; /* pointer arithmetic */
264 		qlen -= count;
265 		if (qlen == 0)
266 			break;
267 	}
268 	if (read > 0)
269 		uiomove(sc->ds_bufqueue, read * sizeof(struct dt_evt), uio);
270 
271 	mtx_enter(&sc->ds_mtx);
272 	sc->ds_evtcnt -= read;
273 	sc->ds_readevt += read;
274 	sc->ds_dropevt += dropped;
275 	mtx_leave(&sc->ds_mtx);
276 
277 	return 0;
278 }
279 
280 int
281 dtioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p)
282 {
283 	struct dt_softc *sc;
284 	int unit = minor(dev);
285 	int on, error = 0;
286 
287 	sc = dtlookup(unit);
288 	KASSERT(sc != NULL);
289 
290 	switch (cmd) {
291 	case DTIOCGPLIST:
292 		return dt_ioctl_list_probes(sc, (struct dtioc_probe *)addr);
293 	case DTIOCGARGS:
294 		return dt_ioctl_get_args(sc, (struct dtioc_arg *)addr);
295 	case DTIOCGSTATS:
296 		return dt_ioctl_get_stats(sc, (struct dtioc_stat *)addr);
297 	case DTIOCRECORD:
298 	case DTIOCPRBENABLE:
299 	case DTIOCPRBDISABLE:
300 	case DTIOCGETAUXBASE:
301 		/* root only ioctl(2) */
302 		break;
303 	default:
304 		return ENOTTY;
305 	}
306 
307 	if ((error = suser(p)) != 0)
308 		return error;
309 
310 	switch (cmd) {
311 	case DTIOCRECORD:
312 		on = *(int *)addr;
313 		if (on)
314 			error = dt_ioctl_record_start(sc);
315 		else
316 			dt_ioctl_record_stop(sc);
317 		break;
318 	case DTIOCPRBENABLE:
319 		error = dt_ioctl_probe_enable(sc, (struct dtioc_req *)addr);
320 		break;
321 	case DTIOCPRBDISABLE:
322 		error = dt_ioctl_probe_disable(sc, (struct dtioc_req *)addr);
323 		break;
324 	case DTIOCGETAUXBASE:
325 		error = dt_ioctl_get_auxbase(sc, (struct dtioc_getaux *)addr);
326 		break;
327 	default:
328 		KASSERT(0);
329 	}
330 
331 	return error;
332 }
333 
334 struct dt_softc *
335 dtlookup(int unit)
336 {
337 	struct dt_softc *sc;
338 
339 	KERNEL_ASSERT_LOCKED();
340 
341 	SLIST_FOREACH(sc, &dtdev_list, ds_next) {
342 		if (sc->ds_unit == unit)
343 			break;
344 	}
345 
346 	return sc;
347 }
348 
349 int
350 dtioc_req_isvalid(struct dtioc_req *dtrq)
351 {
352 	switch (dtrq->dtrq_filter.dtf_operand) {
353 	case DT_OP_NONE:
354 	case DT_OP_EQ:
355 	case DT_OP_NE:
356 		break;
357 	default:
358 		return 0;
359 	}
360 
361 	switch (dtrq->dtrq_filter.dtf_variable) {
362 	case DT_FV_NONE:
363 	case DT_FV_PID:
364 	case DT_FV_TID:
365 		break;
366 	default:
367 		return 0;
368 	}
369 
370 	return 1;
371 }
372 
373 int
374 dt_ioctl_list_probes(struct dt_softc *sc, struct dtioc_probe *dtpr)
375 {
376 	struct dtioc_probe_info info, *dtpi;
377 	struct dt_probe *dtp;
378 	size_t size;
379 	int error = 0;
380 
381 	size = dtpr->dtpr_size;
382 	dtpr->dtpr_size = dt_nprobes * sizeof(*dtpi);
383 	if (size == 0)
384 		return 0;
385 
386 	dtpi = dtpr->dtpr_probes;
387 	SIMPLEQ_FOREACH(dtp, &dt_probe_list, dtp_next) {
388 		if (size < sizeof(*dtpi)) {
389 			error = ENOSPC;
390 			break;
391 		}
392 		memset(&info, 0, sizeof(info));
393 		info.dtpi_pbn = dtp->dtp_pbn;
394 		info.dtpi_nargs = dtp->dtp_nargs;
395 		strlcpy(info.dtpi_prov, dtp->dtp_prov->dtpv_name,
396 		    sizeof(info.dtpi_prov));
397 		strlcpy(info.dtpi_func, dtp->dtp_func, sizeof(info.dtpi_func));
398 		strlcpy(info.dtpi_name, dtp->dtp_name, sizeof(info.dtpi_name));
399 		error = copyout(&info, dtpi, sizeof(*dtpi));
400 		if (error)
401 			break;
402 		size -= sizeof(*dtpi);
403 		dtpi++;
404 	}
405 
406 	return error;
407 }
408 
409 int
410 dt_ioctl_get_args(struct dt_softc *sc, struct dtioc_arg *dtar)
411 {
412 	struct dtioc_arg_info info, *dtai;
413 	struct dt_probe *dtp;
414 	size_t size, n, t;
415 	uint32_t pbn;
416 	int error = 0;
417 
418 	pbn = dtar->dtar_pbn;
419 	if (pbn == 0 || pbn > dt_nprobes)
420 		return EINVAL;
421 
422 	SIMPLEQ_FOREACH(dtp, &dt_probe_list, dtp_next) {
423 		if (pbn == dtp->dtp_pbn)
424 			break;
425 	}
426 	if (dtp == NULL)
427 		return EINVAL;
428 
429 	if (dtp->dtp_sysnum != 0) {
430 		/* currently not supported for system calls */
431 		dtar->dtar_size = 0;
432 		return 0;
433 	}
434 
435 	size = dtar->dtar_size;
436 	dtar->dtar_size = dtp->dtp_nargs * sizeof(*dtar);
437 	if (size == 0)
438 		return 0;
439 
440 	t = 0;
441 	dtai = dtar->dtar_args;
442 	for (n = 0; n < dtp->dtp_nargs; n++) {
443 		if (size < sizeof(*dtai)) {
444 			error = ENOSPC;
445 			break;
446 		}
447 		if (n >= DTMAXARGTYPES || dtp->dtp_argtype[n] == NULL)
448 			continue;
449 		memset(&info, 0, sizeof(info));
450 		info.dtai_pbn = dtp->dtp_pbn;
451 		info.dtai_argn = t++;
452 		strlcpy(info.dtai_argtype, dtp->dtp_argtype[n],
453 		    sizeof(info.dtai_argtype));
454 		error = copyout(&info, dtai, sizeof(*dtai));
455 		if (error)
456 			break;
457 		size -= sizeof(*dtai);
458 		dtai++;
459 	}
460 	dtar->dtar_size = t * sizeof(*dtar);
461 
462 	return error;
463 }
464 
465 int
466 dt_ioctl_get_stats(struct dt_softc *sc, struct dtioc_stat *dtst)
467 {
468 	mtx_enter(&sc->ds_mtx);
469 	dtst->dtst_readevt = sc->ds_readevt;
470 	dtst->dtst_dropevt = sc->ds_dropevt;
471 	mtx_leave(&sc->ds_mtx);
472 
473 	return 0;
474 }
475 
476 int
477 dt_ioctl_record_start(struct dt_softc *sc)
478 {
479 	struct dt_pcb *dp;
480 
481 	if (sc->ds_recording)
482 		return EBUSY;
483 
484 	KERNEL_ASSERT_LOCKED();
485 	if (TAILQ_EMPTY(&sc->ds_pcbs))
486 		return ENOENT;
487 
488 	rw_enter_write(&dt_lock);
489 	TAILQ_FOREACH(dp, &sc->ds_pcbs, dp_snext) {
490 		struct dt_probe *dtp = dp->dp_dtp;
491 
492 		SMR_SLIST_INSERT_HEAD_LOCKED(&dtp->dtp_pcbs, dp, dp_pnext);
493 		dtp->dtp_recording++;
494 		dtp->dtp_prov->dtpv_recording++;
495 	}
496 	rw_exit_write(&dt_lock);
497 
498 	sc->ds_recording = 1;
499 	dt_tracing++;
500 
501 	return 0;
502 }
503 
504 void
505 dt_ioctl_record_stop(struct dt_softc *sc)
506 {
507 	struct dt_pcb *dp;
508 
509 	if (!sc->ds_recording)
510 		return;
511 
512 	DPRINTF("dt%d: pid %d disable\n", sc->ds_unit, sc->ds_pid);
513 
514 	dt_tracing--;
515 	sc->ds_recording = 0;
516 
517 	rw_enter_write(&dt_lock);
518 	TAILQ_FOREACH(dp, &sc->ds_pcbs, dp_snext) {
519 		struct dt_probe *dtp = dp->dp_dtp;
520 
521 		dtp->dtp_recording--;
522 		dtp->dtp_prov->dtpv_recording--;
523 		SMR_SLIST_REMOVE_LOCKED(&dtp->dtp_pcbs, dp, dt_pcb, dp_pnext);
524 	}
525 	rw_exit_write(&dt_lock);
526 
527 	/* Wait until readers cannot access the PCBs. */
528 	smr_barrier();
529 }
530 
531 int
532 dt_ioctl_probe_enable(struct dt_softc *sc, struct dtioc_req *dtrq)
533 {
534 	struct dt_pcb_list plist;
535 	struct dt_probe *dtp;
536 	int error;
537 
538 	if (!dtioc_req_isvalid(dtrq))
539 		return EINVAL;
540 
541 	SIMPLEQ_FOREACH(dtp, &dt_probe_list, dtp_next) {
542 		if (dtp->dtp_pbn == dtrq->dtrq_pbn)
543 			break;
544 	}
545 	if (dtp == NULL)
546 		return ENOENT;
547 
548 	TAILQ_INIT(&plist);
549 	error = dtp->dtp_prov->dtpv_alloc(dtp, sc, &plist, dtrq);
550 	if (error)
551 		return error;
552 
553 	DPRINTF("dt%d: pid %d enable %u : %b\n", sc->ds_unit, sc->ds_pid,
554 	    dtrq->dtrq_pbn, (unsigned int)dtrq->dtrq_evtflags, DTEVT_FLAG_BITS);
555 
556 	/* Append all PCBs to this instance */
557 	TAILQ_CONCAT(&sc->ds_pcbs, &plist, dp_snext);
558 
559 	return 0;
560 }
561 
562 int
563 dt_ioctl_probe_disable(struct dt_softc *sc, struct dtioc_req *dtrq)
564 {
565 	struct dt_probe *dtp;
566 	int error;
567 
568 	if (!dtioc_req_isvalid(dtrq))
569 		return EINVAL;
570 
571 	SIMPLEQ_FOREACH(dtp, &dt_probe_list, dtp_next) {
572 		if (dtp->dtp_pbn == dtrq->dtrq_pbn)
573 			break;
574 	}
575 	if (dtp == NULL)
576 		return ENOENT;
577 
578 	if (dtp->dtp_prov->dtpv_dealloc) {
579 		error = dtp->dtp_prov->dtpv_dealloc(dtp, sc, dtrq);
580 		if (error)
581 			return error;
582 	}
583 
584 	DPRINTF("dt%d: pid %d dealloc\n", sc->ds_unit, sc->ds_pid,
585 	    dtrq->dtrq_pbn);
586 
587 	return 0;
588 }
589 
590 int
591 dt_ioctl_get_auxbase(struct dt_softc *sc, struct dtioc_getaux *dtga)
592 {
593 	struct uio uio;
594 	struct iovec iov;
595 	struct process *pr;
596 	struct proc *p = curproc;
597 	AuxInfo auxv[ELF_AUX_ENTRIES];
598 	int i, error;
599 
600 	dtga->dtga_auxbase = 0;
601 
602 	if ((pr = prfind(dtga->dtga_pid)) == NULL)
603 		return ESRCH;
604 
605 	iov.iov_base = auxv;
606 	iov.iov_len = sizeof(auxv);
607 	uio.uio_iov = &iov;
608 	uio.uio_iovcnt = 1;
609 	uio.uio_offset = pr->ps_auxinfo;
610 	uio.uio_resid = sizeof(auxv);
611 	uio.uio_segflg = UIO_SYSSPACE;
612 	uio.uio_procp = p;
613 	uio.uio_rw = UIO_READ;
614 
615 	error = process_domem(p, pr, &uio, PT_READ_D);
616 	if (error)
617 		return error;
618 
619 	for (i = 0; i < ELF_AUX_ENTRIES; i++)
620 		if (auxv[i].au_id == AUX_base)
621 			dtga->dtga_auxbase = auxv[i].au_v;
622 
623 	return 0;
624 }
625 
626 struct dt_probe *
627 dt_dev_alloc_probe(const char *func, const char *name, struct dt_provider *dtpv)
628 {
629 	struct dt_probe *dtp;
630 
631 	dtp = malloc(sizeof(*dtp), M_DT, M_NOWAIT|M_ZERO);
632 	if (dtp == NULL)
633 		return NULL;
634 
635 	SMR_SLIST_INIT(&dtp->dtp_pcbs);
636 	dtp->dtp_prov = dtpv;
637 	dtp->dtp_func = func;
638 	dtp->dtp_name = name;
639 	dtp->dtp_sysnum = -1;
640 	dtp->dtp_ref = 0;
641 
642 	return dtp;
643 }
644 
645 void
646 dt_dev_register_probe(struct dt_probe *dtp)
647 {
648 	static uint64_t probe_nb;
649 
650 	dtp->dtp_pbn = ++probe_nb;
651 	SIMPLEQ_INSERT_TAIL(&dt_probe_list, dtp, dtp_next);
652 }
653 
654 struct dt_pcb *
655 dt_pcb_alloc(struct dt_probe *dtp, struct dt_softc *sc)
656 {
657 	struct dt_pcb *dp;
658 
659 	dp = malloc(sizeof(*dp), M_DT, M_WAITOK|M_CANFAIL|M_ZERO);
660 	if (dp == NULL)
661 		goto bad;
662 
663 	dp->dp_ring = mallocarray(DT_EVTRING_SIZE, sizeof(*dp->dp_ring), M_DT,
664 	    M_WAITOK|M_CANFAIL|M_ZERO);
665 	if (dp->dp_ring == NULL)
666 		goto bad;
667 
668 	mtx_init(&dp->dp_mtx, IPL_HIGH);
669 	dp->dp_sc = sc;
670 	dp->dp_dtp = dtp;
671 	return dp;
672 bad:
673 	dt_pcb_free(dp);
674 	return NULL;
675 }
676 
677 void
678 dt_pcb_free(struct dt_pcb *dp)
679 {
680 	if (dp == NULL)
681 		return;
682 	free(dp->dp_ring, M_DT, DT_EVTRING_SIZE * sizeof(*dp->dp_ring));
683 	free(dp, M_DT, sizeof(*dp));
684 }
685 
686 void
687 dt_pcb_purge(struct dt_pcb_list *plist)
688 {
689 	struct dt_pcb *dp;
690 
691 	while ((dp = TAILQ_FIRST(plist)) != NULL) {
692 		TAILQ_REMOVE(plist, dp, dp_snext);
693 		dt_pcb_free(dp);
694 	}
695 }
696 
697 int
698 dt_pcb_filter(struct dt_pcb *dp)
699 {
700 	struct dt_filter *dtf = &dp->dp_filter;
701 	struct proc *p = curproc;
702 	unsigned int var = 0;
703 	int match = 1;
704 
705 	/* Filter out tracing program. */
706 	if (dp->dp_sc->ds_pid == p->p_p->ps_pid)
707 		return 1;
708 
709 	switch (dtf->dtf_variable) {
710 	case DT_FV_PID:
711 		var = p->p_p->ps_pid;
712 		break;
713 	case DT_FV_TID:
714 		var = p->p_tid + THREAD_PID_OFFSET;
715 		break;
716 	case DT_FV_NONE:
717 		break;
718 	default:
719 		KASSERT(0);
720 	}
721 
722 	switch (dtf->dtf_operand) {
723 	case DT_OP_EQ:
724 		match = !!(var == dtf->dtf_value);
725 		break;
726 	case DT_OP_NE:
727 		match = !!(var != dtf->dtf_value);
728 		break;
729 	case DT_OP_NONE:
730 		break;
731 	default:
732 		KASSERT(0);
733 	}
734 
735 	return !match;
736 }
737 
738 
739 /*
740  * Get a reference to the next free event state from the ring.
741  */
742 struct dt_evt *
743 dt_pcb_ring_get(struct dt_pcb *dp, int profiling)
744 {
745 	struct proc *p = curproc;
746 	struct dt_evt *dtev;
747 	int distance;
748 
749 	if (dt_pcb_filter(dp))
750 		return NULL;
751 
752 	mtx_enter(&dp->dp_mtx);
753 	distance = dp->dp_prod - dp->dp_cons;
754 	if (distance == 1 || distance == (1 - DT_EVTRING_SIZE)) {
755 		/* read(2) isn't finished */
756 		dp->dp_dropevt++;
757 		mtx_leave(&dp->dp_mtx);
758 		return NULL;
759 	}
760 
761 	/*
762 	 * Save states in next free event slot.
763 	 */
764 	dtev = &dp->dp_ring[dp->dp_cons];
765 	memset(dtev, 0, sizeof(*dtev));
766 
767 	dtev->dtev_pbn = dp->dp_dtp->dtp_pbn;
768 	dtev->dtev_cpu = cpu_number();
769 	dtev->dtev_pid = p->p_p->ps_pid;
770 	dtev->dtev_tid = p->p_tid + THREAD_PID_OFFSET;
771 	nanotime(&dtev->dtev_tsp);
772 
773 	if (ISSET(dp->dp_evtflags, DTEVT_EXECNAME))
774 		strlcpy(dtev->dtev_comm, p->p_p->ps_comm, sizeof(dtev->dtev_comm));
775 
776 	if (ISSET(dp->dp_evtflags, DTEVT_KSTACK)) {
777 		if (profiling)
778 			stacktrace_save_at(&dtev->dtev_kstack, DT_FA_PROFILE);
779 		else
780 			stacktrace_save_at(&dtev->dtev_kstack, DT_FA_STATIC);
781 	}
782 	if (ISSET(dp->dp_evtflags, DTEVT_USTACK))
783 		stacktrace_save_utrace(&dtev->dtev_ustack);
784 
785 	return dtev;
786 }
787 
788 void
789 dt_pcb_ring_consume(struct dt_pcb *dp, struct dt_evt *dtev)
790 {
791 	MUTEX_ASSERT_LOCKED(&dp->dp_mtx);
792 	KASSERT(dtev == &dp->dp_ring[dp->dp_cons]);
793 
794 	dp->dp_cons = (dp->dp_cons + 1) % DT_EVTRING_SIZE;
795 	mtx_leave(&dp->dp_mtx);
796 
797 	mtx_enter(&dp->dp_sc->ds_mtx);
798 	dp->dp_sc->ds_evtcnt++;
799 	mtx_leave(&dp->dp_sc->ds_mtx);
800 	wakeup(dp->dp_sc);
801 }
802 
803 /*
804  * Copy at most `qlen' events from `dp', producing the same amount
805  * of free slots.
806  */
807 int
808 dt_pcb_ring_copy(struct dt_pcb *dp, struct dt_evt *estq, size_t qlen,
809     uint64_t *dropped)
810 {
811 	size_t count, copied = 0;
812 	unsigned int cons, prod;
813 
814 	KASSERT(qlen > 0);
815 
816 	mtx_enter(&dp->dp_mtx);
817 	cons = dp->dp_cons;
818 	prod = dp->dp_prod;
819 
820 	if (cons < prod)
821 		count = DT_EVTRING_SIZE - prod;
822 	else
823 		count = cons - prod;
824 
825 	if (count == 0)
826 		goto out;
827 
828 	*dropped += dp->dp_dropevt;
829 	dp->dp_dropevt = 0;
830 
831 	count = MIN(count, qlen);
832 
833 	memcpy(&estq[0], &dp->dp_ring[prod], count * sizeof(*estq));
834 	copied += count;
835 
836 	/* Produce */
837 	prod = (prod + count) % DT_EVTRING_SIZE;
838 
839 	/* If the queue is full or the ring didn't wrap, stop here. */
840 	if (qlen == copied || prod != 0 || cons == 0)
841 		goto out;
842 
843 	count = MIN(cons, (qlen - copied));
844 	memcpy(&estq[copied], &dp->dp_ring[0], count * sizeof(*estq));
845 	copied += count;
846 	prod += count;
847 
848 out:
849 	dp->dp_prod = prod;
850 	mtx_leave(&dp->dp_mtx);
851 	return copied;
852 }
853