xref: /openbsd-src/sys/dev/dt/dt_dev.c (revision fc405d53b73a2d73393cb97f684863d17b583e38)
1 /*	$OpenBSD: dt_dev.c,v 1.26 2023/04/26 16:53:59 claudio Exp $ */
2 
3 /*
4  * Copyright (c) 2019 Martin Pieuchot <mpi@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/types.h>
20 #include <sys/systm.h>
21 #include <sys/param.h>
22 #include <sys/device.h>
23 #include <sys/exec_elf.h>
24 #include <sys/malloc.h>
25 #include <sys/proc.h>
26 #include <sys/ptrace.h>
27 
28 #include <dev/dt/dtvar.h>
29 
30 /*
31  * Number of frames to skip in stack traces.
32  *
33  * The number of frames required to execute dt(4) profiling code
34  * depends on the probe, context, architecture and possibly the
35  * compiler.
36  *
37  * Static probes (tracepoints) are executed in the context of the
38  * current thread and only need to skip frames up to the recording
39  * function.  For example the syscall provider:
40  *
41  *	dt_prov_syscall_entry+0x141
42  *	syscall+0x205		<--- start here
43  *	Xsyscall+0x128
44  *
45  * Probes executed in their own context, like the profile provider,
46  * need to skip the frames of that context which are different for
47  * every architecture.  For example the profile provider executed
48  * from hardclock(9) on amd64:
49  *
50  *	dt_prov_profile_enter+0x6e
51  *	hardclock+0x1a9
52  *	lapic_clockintr+0x3f
53  *	Xresume_lapic_ltimer+0x26
54  *	acpicpu_idle+0x1d2	<---- start here.
55  *	sched_idle+0x225
56  *	proc_trampoline+0x1c
57  */
58 #if defined(__amd64__)
59 #define DT_FA_PROFILE	7
60 #define DT_FA_STATIC	2
61 #elif defined(__i386__)
62 #define DT_FA_PROFILE	8
63 #define DT_FA_STATIC	2
64 #elif defined(__macppc__)
65 #define DT_FA_PROFILE  7
66 #define DT_FA_STATIC   2
67 #elif defined(__octeon__)
68 #define DT_FA_PROFILE	6
69 #define DT_FA_STATIC	2
70 #elif defined(__powerpc64__)
71 #define DT_FA_PROFILE	6
72 #define DT_FA_STATIC	2
73 #elif defined(__sparc64__)
74 #define DT_FA_PROFILE	7
75 #define DT_FA_STATIC	1
76 #else
77 #define DT_FA_STATIC	0
78 #define DT_FA_PROFILE	0
79 #endif
80 
81 #define DT_EVTRING_SIZE	16	/* # of slots in per PCB event ring */
82 
83 #define DPRINTF(x...) /* nothing */
84 
85 /*
86  * Descriptor associated with each program opening /dev/dt.  It is used
87  * to keep track of enabled PCBs.
88  *
89  *  Locks used to protect struct members in this file:
90  *	m	per-softc mutex
91  *	K	kernel lock
92  */
93 struct dt_softc {
94 	SLIST_ENTRY(dt_softc)	 ds_next;	/* [K] descriptor list */
95 	int			 ds_unit;	/* [I] D_CLONE unique unit */
96 	pid_t			 ds_pid;	/* [I] PID of tracing program */
97 
98 	struct mutex		 ds_mtx;
99 
100 	struct dt_pcb_list	 ds_pcbs;	/* [K] list of enabled PCBs */
101 	struct dt_evt		*ds_bufqueue;	/* [K] copy evts to userland */
102 	size_t			 ds_bufqlen;	/* [K] length of the queue */
103 	int			 ds_recording;	/* [K] currently recording? */
104 	int			 ds_evtcnt;	/* [m] # of readable evts */
105 
106 	/* Counters */
107 	uint64_t		 ds_readevt;	/* [m] # of events read */
108 	uint64_t		 ds_dropevt;	/* [m] # of events dropped */
109 };
110 
111 SLIST_HEAD(, dt_softc) dtdev_list;	/* [K] list of open /dev/dt nodes */
112 
113 /*
114  * Probes are created during dt_attach() and never modified/freed during
115  * the lifetime of the system.  That's why we consider them as [I]mmutable.
116  */
117 unsigned int			dt_nprobes;	/* [I] # of probes available */
118 SIMPLEQ_HEAD(, dt_probe)	dt_probe_list;	/* [I] list of probes */
119 
120 struct rwlock			dt_lock = RWLOCK_INITIALIZER("dtlk");
121 volatile uint32_t		dt_tracing = 0;	/* [K] # of processes tracing */
122 
123 int allowdt;
124 
125 void	dtattach(struct device *, struct device *, void *);
126 int	dtopen(dev_t, int, int, struct proc *);
127 int	dtclose(dev_t, int, int, struct proc *);
128 int	dtread(dev_t, struct uio *, int);
129 int	dtioctl(dev_t, u_long, caddr_t, int, struct proc *);
130 
131 struct	dt_softc *dtlookup(int);
132 
133 int	dt_ioctl_list_probes(struct dt_softc *, struct dtioc_probe *);
134 int	dt_ioctl_get_args(struct dt_softc *, struct dtioc_arg *);
135 int	dt_ioctl_get_stats(struct dt_softc *, struct dtioc_stat *);
136 int	dt_ioctl_record_start(struct dt_softc *);
137 void	dt_ioctl_record_stop(struct dt_softc *);
138 int	dt_ioctl_probe_enable(struct dt_softc *, struct dtioc_req *);
139 int	dt_ioctl_probe_disable(struct dt_softc *, struct dtioc_req *);
140 int	dt_ioctl_get_auxbase(struct dt_softc *, struct dtioc_getaux *);
141 
142 int	dt_pcb_ring_copy(struct dt_pcb *, struct dt_evt *, size_t, uint64_t *);
143 
144 void
145 dtattach(struct device *parent, struct device *self, void *aux)
146 {
147 	SLIST_INIT(&dtdev_list);
148 	SIMPLEQ_INIT(&dt_probe_list);
149 
150 	/* Init providers */
151 	dt_nprobes += dt_prov_profile_init();
152 	dt_nprobes += dt_prov_syscall_init();
153 	dt_nprobes += dt_prov_static_init();
154 #ifdef DDBPROF
155 	dt_nprobes += dt_prov_kprobe_init();
156 #endif
157 }
158 
159 int
160 dtopen(dev_t dev, int flags, int mode, struct proc *p)
161 {
162 	struct dt_softc *sc;
163 	int unit = minor(dev);
164 
165 	if (!allowdt)
166 		return EPERM;
167 
168 	KASSERT(dtlookup(unit) == NULL);
169 
170 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_CANFAIL|M_ZERO);
171 	if (sc == NULL)
172 		return ENOMEM;
173 
174 	/*
175 	 * Enough space to empty 2 full rings of events in a single read.
176 	 */
177 	sc->ds_bufqlen = 2 * DT_EVTRING_SIZE;
178 	sc->ds_bufqueue = mallocarray(sc->ds_bufqlen, sizeof(*sc->ds_bufqueue),
179 	    M_DEVBUF, M_WAITOK|M_CANFAIL);
180 	if (sc->ds_bufqueue == NULL)
181 		goto bad;
182 
183 	sc->ds_unit = unit;
184 	sc->ds_pid = p->p_p->ps_pid;
185 	TAILQ_INIT(&sc->ds_pcbs);
186 	mtx_init(&sc->ds_mtx, IPL_HIGH);
187 	sc->ds_evtcnt = 0;
188 	sc->ds_readevt = 0;
189 	sc->ds_dropevt = 0;
190 
191 	SLIST_INSERT_HEAD(&dtdev_list, sc, ds_next);
192 
193 	DPRINTF("dt%d: pid %d open\n", sc->ds_unit, sc->ds_pid);
194 
195 	return 0;
196 
197 bad:
198 	free(sc, M_DEVBUF, sizeof(*sc));
199 	return ENOMEM;
200 }
201 
202 int
203 dtclose(dev_t dev, int flags, int mode, struct proc *p)
204 {
205 	struct dt_softc *sc;
206 	int unit = minor(dev);
207 
208 	sc = dtlookup(unit);
209 	KASSERT(sc != NULL);
210 
211 	DPRINTF("dt%d: pid %d close\n", sc->ds_unit, sc->ds_pid);
212 
213 	SLIST_REMOVE(&dtdev_list, sc, dt_softc, ds_next);
214 	dt_ioctl_record_stop(sc);
215 	dt_pcb_purge(&sc->ds_pcbs);
216 
217 	free(sc->ds_bufqueue, M_DEVBUF,
218 	    sc->ds_bufqlen * sizeof(*sc->ds_bufqueue));
219 	free(sc, M_DEVBUF, sizeof(*sc));
220 
221 	return 0;
222 }
223 
224 int
225 dtread(dev_t dev, struct uio *uio, int flags)
226 {
227 	struct sleep_state sls;
228 	struct dt_softc *sc;
229 	struct dt_evt *estq;
230 	struct dt_pcb *dp;
231 	int error = 0, unit = minor(dev);
232 	size_t qlen, count, read = 0;
233 	uint64_t dropped = 0;
234 
235 	sc = dtlookup(unit);
236 	KASSERT(sc != NULL);
237 
238 	count = howmany(uio->uio_resid, sizeof(struct dt_evt));
239 	if (count < 1)
240 		return (EMSGSIZE);
241 
242 	while (!sc->ds_evtcnt) {
243 		sleep_setup(&sls, sc, PWAIT | PCATCH, "dtread", 0);
244 		error = sleep_finish(&sls, !sc->ds_evtcnt);
245 		if (error == EINTR || error == ERESTART)
246 			break;
247 	}
248 	if (error)
249 		return error;
250 
251 	estq = sc->ds_bufqueue;
252 	qlen = MIN(sc->ds_bufqlen, count);
253 
254 	KERNEL_ASSERT_LOCKED();
255 	TAILQ_FOREACH(dp, &sc->ds_pcbs, dp_snext) {
256 		count = dt_pcb_ring_copy(dp, estq, qlen, &dropped);
257 		read += count;
258 		estq += count; /* pointer arithmetic */
259 		qlen -= count;
260 		if (qlen == 0)
261 			break;
262 	}
263 	if (read > 0)
264 		uiomove(sc->ds_bufqueue, read * sizeof(struct dt_evt), uio);
265 
266 	mtx_enter(&sc->ds_mtx);
267 	sc->ds_evtcnt -= read;
268 	sc->ds_readevt += read;
269 	sc->ds_dropevt += dropped;
270 	mtx_leave(&sc->ds_mtx);
271 
272 	return 0;
273 }
274 
275 int
276 dtioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p)
277 {
278 	struct dt_softc *sc;
279 	int unit = minor(dev);
280 	int on, error = 0;
281 
282 	sc = dtlookup(unit);
283 	KASSERT(sc != NULL);
284 
285 	switch (cmd) {
286 	case DTIOCGPLIST:
287 		return dt_ioctl_list_probes(sc, (struct dtioc_probe *)addr);
288 	case DTIOCGARGS:
289 		return dt_ioctl_get_args(sc, (struct dtioc_arg *)addr);
290 	case DTIOCGSTATS:
291 		return dt_ioctl_get_stats(sc, (struct dtioc_stat *)addr);
292 	case DTIOCRECORD:
293 	case DTIOCPRBENABLE:
294 	case DTIOCPRBDISABLE:
295 	case DTIOCGETAUXBASE:
296 		/* root only ioctl(2) */
297 		break;
298 	default:
299 		return ENOTTY;
300 	}
301 
302 	if ((error = suser(p)) != 0)
303 		return error;
304 
305 	switch (cmd) {
306 	case DTIOCRECORD:
307 		on = *(int *)addr;
308 		if (on)
309 			error = dt_ioctl_record_start(sc);
310 		else
311 			dt_ioctl_record_stop(sc);
312 		break;
313 	case DTIOCPRBENABLE:
314 		error = dt_ioctl_probe_enable(sc, (struct dtioc_req *)addr);
315 		break;
316 	case DTIOCPRBDISABLE:
317 		error = dt_ioctl_probe_disable(sc, (struct dtioc_req *)addr);
318 		break;
319 	case DTIOCGETAUXBASE:
320 		error = dt_ioctl_get_auxbase(sc, (struct dtioc_getaux *)addr);
321 		break;
322 	default:
323 		KASSERT(0);
324 	}
325 
326 	return error;
327 }
328 
329 struct dt_softc *
330 dtlookup(int unit)
331 {
332 	struct dt_softc *sc;
333 
334 	KERNEL_ASSERT_LOCKED();
335 
336 	SLIST_FOREACH(sc, &dtdev_list, ds_next) {
337 		if (sc->ds_unit == unit)
338 			break;
339 	}
340 
341 	return sc;
342 }
343 
344 int
345 dtioc_req_isvalid(struct dtioc_req *dtrq)
346 {
347 	switch (dtrq->dtrq_filter.dtf_operand) {
348 	case DT_OP_NONE:
349 	case DT_OP_EQ:
350 	case DT_OP_NE:
351 		break;
352 	default:
353 		return 0;
354 	}
355 
356 	switch (dtrq->dtrq_filter.dtf_variable) {
357 	case DT_FV_NONE:
358 	case DT_FV_PID:
359 	case DT_FV_TID:
360 		break;
361 	default:
362 		return 0;
363 	}
364 
365 	return 1;
366 }
367 
368 int
369 dt_ioctl_list_probes(struct dt_softc *sc, struct dtioc_probe *dtpr)
370 {
371 	struct dtioc_probe_info info, *dtpi;
372 	struct dt_probe *dtp;
373 	size_t size;
374 	int error = 0;
375 
376 	size = dtpr->dtpr_size;
377 	dtpr->dtpr_size = dt_nprobes * sizeof(*dtpi);
378 	if (size == 0)
379 		return 0;
380 
381 	dtpi = dtpr->dtpr_probes;
382 	SIMPLEQ_FOREACH(dtp, &dt_probe_list, dtp_next) {
383 		if (size < sizeof(*dtpi)) {
384 			error = ENOSPC;
385 			break;
386 		}
387 		memset(&info, 0, sizeof(info));
388 		info.dtpi_pbn = dtp->dtp_pbn;
389 		info.dtpi_nargs = dtp->dtp_nargs;
390 		strlcpy(info.dtpi_prov, dtp->dtp_prov->dtpv_name,
391 		    sizeof(info.dtpi_prov));
392 		strlcpy(info.dtpi_func, dtp->dtp_func, sizeof(info.dtpi_func));
393 		strlcpy(info.dtpi_name, dtp->dtp_name, sizeof(info.dtpi_name));
394 		error = copyout(&info, dtpi, sizeof(*dtpi));
395 		if (error)
396 			break;
397 		size -= sizeof(*dtpi);
398 		dtpi++;
399 	}
400 
401 	return error;
402 }
403 
404 int
405 dt_ioctl_get_args(struct dt_softc *sc, struct dtioc_arg *dtar)
406 {
407 	struct dtioc_arg_info info, *dtai;
408 	struct dt_probe *dtp;
409 	size_t size, n, t;
410 	uint32_t pbn;
411 	int error = 0;
412 
413 	pbn = dtar->dtar_pbn;
414 	if (pbn == 0 || pbn > dt_nprobes)
415 		return EINVAL;
416 
417 	SIMPLEQ_FOREACH(dtp, &dt_probe_list, dtp_next) {
418 		if (pbn == dtp->dtp_pbn)
419 			break;
420 	}
421 	if (dtp == NULL)
422 		return EINVAL;
423 
424 	if (dtp->dtp_sysnum != 0) {
425 		/* currently not supported for system calls */
426 		dtar->dtar_size = 0;
427 		return 0;
428 	}
429 
430 	size = dtar->dtar_size;
431 	dtar->dtar_size = dtp->dtp_nargs * sizeof(*dtar);
432 	if (size == 0)
433 		return 0;
434 
435 	t = 0;
436 	dtai = dtar->dtar_args;
437 	for (n = 0; n < dtp->dtp_nargs; n++) {
438 		if (size < sizeof(*dtai)) {
439 			error = ENOSPC;
440 			break;
441 		}
442 		if (n >= DTMAXARGTYPES || dtp->dtp_argtype[n] == NULL)
443 			continue;
444 		memset(&info, 0, sizeof(info));
445 		info.dtai_pbn = dtp->dtp_pbn;
446 		info.dtai_argn = t++;
447 		strlcpy(info.dtai_argtype, dtp->dtp_argtype[n],
448 		    sizeof(info.dtai_argtype));
449 		error = copyout(&info, dtai, sizeof(*dtai));
450 		if (error)
451 			break;
452 		size -= sizeof(*dtai);
453 		dtai++;
454 	}
455 	dtar->dtar_size = t * sizeof(*dtar);
456 
457 	return error;
458 }
459 
460 int
461 dt_ioctl_get_stats(struct dt_softc *sc, struct dtioc_stat *dtst)
462 {
463 	mtx_enter(&sc->ds_mtx);
464 	dtst->dtst_readevt = sc->ds_readevt;
465 	dtst->dtst_dropevt = sc->ds_dropevt;
466 	mtx_leave(&sc->ds_mtx);
467 
468 	return 0;
469 }
470 
471 int
472 dt_ioctl_record_start(struct dt_softc *sc)
473 {
474 	struct dt_pcb *dp;
475 
476 	if (sc->ds_recording)
477 		return EBUSY;
478 
479 	KERNEL_ASSERT_LOCKED();
480 	if (TAILQ_EMPTY(&sc->ds_pcbs))
481 		return ENOENT;
482 
483 	rw_enter_write(&dt_lock);
484 	TAILQ_FOREACH(dp, &sc->ds_pcbs, dp_snext) {
485 		struct dt_probe *dtp = dp->dp_dtp;
486 
487 		SMR_SLIST_INSERT_HEAD_LOCKED(&dtp->dtp_pcbs, dp, dp_pnext);
488 		dtp->dtp_recording++;
489 		dtp->dtp_prov->dtpv_recording++;
490 	}
491 	rw_exit_write(&dt_lock);
492 
493 	sc->ds_recording = 1;
494 	dt_tracing++;
495 
496 	return 0;
497 }
498 
499 void
500 dt_ioctl_record_stop(struct dt_softc *sc)
501 {
502 	struct dt_pcb *dp;
503 
504 	if (!sc->ds_recording)
505 		return;
506 
507 	DPRINTF("dt%d: pid %d disable\n", sc->ds_unit, sc->ds_pid);
508 
509 	dt_tracing--;
510 	sc->ds_recording = 0;
511 
512 	rw_enter_write(&dt_lock);
513 	TAILQ_FOREACH(dp, &sc->ds_pcbs, dp_snext) {
514 		struct dt_probe *dtp = dp->dp_dtp;
515 
516 		dtp->dtp_recording--;
517 		dtp->dtp_prov->dtpv_recording--;
518 		SMR_SLIST_REMOVE_LOCKED(&dtp->dtp_pcbs, dp, dt_pcb, dp_pnext);
519 	}
520 	rw_exit_write(&dt_lock);
521 
522 	/* Wait until readers cannot access the PCBs. */
523 	smr_barrier();
524 }
525 
526 int
527 dt_ioctl_probe_enable(struct dt_softc *sc, struct dtioc_req *dtrq)
528 {
529 	struct dt_pcb_list plist;
530 	struct dt_probe *dtp;
531 	int error;
532 
533 	if (!dtioc_req_isvalid(dtrq))
534 		return EINVAL;
535 
536 	SIMPLEQ_FOREACH(dtp, &dt_probe_list, dtp_next) {
537 		if (dtp->dtp_pbn == dtrq->dtrq_pbn)
538 			break;
539 	}
540 	if (dtp == NULL)
541 		return ENOENT;
542 
543 	TAILQ_INIT(&plist);
544 	error = dtp->dtp_prov->dtpv_alloc(dtp, sc, &plist, dtrq);
545 	if (error)
546 		return error;
547 
548 	DPRINTF("dt%d: pid %d enable %u : %b\n", sc->ds_unit, sc->ds_pid,
549 	    dtrq->dtrq_pbn, (unsigned int)dtrq->dtrq_evtflags, DTEVT_FLAG_BITS);
550 
551 	/* Append all PCBs to this instance */
552 	TAILQ_CONCAT(&sc->ds_pcbs, &plist, dp_snext);
553 
554 	return 0;
555 }
556 
557 int
558 dt_ioctl_probe_disable(struct dt_softc *sc, struct dtioc_req *dtrq)
559 {
560 	struct dt_probe *dtp;
561 	int error;
562 
563 	if (!dtioc_req_isvalid(dtrq))
564 		return EINVAL;
565 
566 	SIMPLEQ_FOREACH(dtp, &dt_probe_list, dtp_next) {
567 		if (dtp->dtp_pbn == dtrq->dtrq_pbn)
568 			break;
569 	}
570 	if (dtp == NULL)
571 		return ENOENT;
572 
573 	if (dtp->dtp_prov->dtpv_dealloc) {
574 		error = dtp->dtp_prov->dtpv_dealloc(dtp, sc, dtrq);
575 		if (error)
576 			return error;
577 	}
578 
579 	DPRINTF("dt%d: pid %d dealloc\n", sc->ds_unit, sc->ds_pid,
580 	    dtrq->dtrq_pbn);
581 
582 	return 0;
583 }
584 
585 int
586 dt_ioctl_get_auxbase(struct dt_softc *sc, struct dtioc_getaux *dtga)
587 {
588 	struct uio uio;
589 	struct iovec iov;
590 	struct process *pr;
591 	struct proc *p = curproc;
592 	AuxInfo auxv[ELF_AUX_ENTRIES];
593 	int i, error;
594 
595 	dtga->dtga_auxbase = 0;
596 
597 	if ((pr = prfind(dtga->dtga_pid)) == NULL)
598 		return ESRCH;
599 
600 	iov.iov_base = auxv;
601 	iov.iov_len = sizeof(auxv);
602 	uio.uio_iov = &iov;
603 	uio.uio_iovcnt = 1;
604 	uio.uio_offset = pr->ps_auxinfo;
605 	uio.uio_resid = sizeof(auxv);
606 	uio.uio_segflg = UIO_SYSSPACE;
607 	uio.uio_procp = p;
608 	uio.uio_rw = UIO_READ;
609 
610 	error = process_domem(p, pr, &uio, PT_READ_D);
611 	if (error)
612 		return error;
613 
614 	for (i = 0; i < ELF_AUX_ENTRIES; i++)
615 		if (auxv[i].au_id == AUX_base)
616 			dtga->dtga_auxbase = auxv[i].au_v;
617 
618 	return 0;
619 }
620 
621 struct dt_probe *
622 dt_dev_alloc_probe(const char *func, const char *name, struct dt_provider *dtpv)
623 {
624 	struct dt_probe *dtp;
625 
626 	dtp = malloc(sizeof(*dtp), M_DT, M_NOWAIT|M_ZERO);
627 	if (dtp == NULL)
628 		return NULL;
629 
630 	SMR_SLIST_INIT(&dtp->dtp_pcbs);
631 	dtp->dtp_prov = dtpv;
632 	dtp->dtp_func = func;
633 	dtp->dtp_name = name;
634 	dtp->dtp_sysnum = -1;
635 	dtp->dtp_ref = 0;
636 
637 	return dtp;
638 }
639 
640 void
641 dt_dev_register_probe(struct dt_probe *dtp)
642 {
643 	static uint64_t probe_nb;
644 
645 	dtp->dtp_pbn = ++probe_nb;
646 	SIMPLEQ_INSERT_TAIL(&dt_probe_list, dtp, dtp_next);
647 }
648 
649 struct dt_pcb *
650 dt_pcb_alloc(struct dt_probe *dtp, struct dt_softc *sc)
651 {
652 	struct dt_pcb *dp;
653 
654 	dp = malloc(sizeof(*dp), M_DT, M_WAITOK|M_CANFAIL|M_ZERO);
655 	if (dp == NULL)
656 		goto bad;
657 
658 	dp->dp_ring = mallocarray(DT_EVTRING_SIZE, sizeof(*dp->dp_ring), M_DT,
659 	    M_WAITOK|M_CANFAIL|M_ZERO);
660 	if (dp->dp_ring == NULL)
661 		goto bad;
662 
663 	mtx_init(&dp->dp_mtx, IPL_HIGH);
664 	dp->dp_sc = sc;
665 	dp->dp_dtp = dtp;
666 	return dp;
667 bad:
668 	dt_pcb_free(dp);
669 	return NULL;
670 }
671 
672 void
673 dt_pcb_free(struct dt_pcb *dp)
674 {
675 	if (dp == NULL)
676 		return;
677 	free(dp->dp_ring, M_DT, DT_EVTRING_SIZE * sizeof(*dp->dp_ring));
678 	free(dp, M_DT, sizeof(*dp));
679 }
680 
681 void
682 dt_pcb_purge(struct dt_pcb_list *plist)
683 {
684 	struct dt_pcb *dp;
685 
686 	while ((dp = TAILQ_FIRST(plist)) != NULL) {
687 		TAILQ_REMOVE(plist, dp, dp_snext);
688 		dt_pcb_free(dp);
689 	}
690 }
691 
692 int
693 dt_pcb_filter(struct dt_pcb *dp)
694 {
695 	struct dt_filter *dtf = &dp->dp_filter;
696 	struct proc *p = curproc;
697 	unsigned int var = 0;
698 	int match = 1;
699 
700 	/* Filter out tracing program. */
701 	if (dp->dp_sc->ds_pid == p->p_p->ps_pid)
702 		return 1;
703 
704 	switch (dtf->dtf_variable) {
705 	case DT_FV_PID:
706 		var = p->p_p->ps_pid;
707 		break;
708 	case DT_FV_TID:
709 		var = p->p_tid + THREAD_PID_OFFSET;
710 		break;
711 	case DT_FV_NONE:
712 		break;
713 	default:
714 		KASSERT(0);
715 	}
716 
717 	switch (dtf->dtf_operand) {
718 	case DT_OP_EQ:
719 		match = !!(var == dtf->dtf_value);
720 		break;
721 	case DT_OP_NE:
722 		match = !!(var != dtf->dtf_value);
723 		break;
724 	case DT_OP_NONE:
725 		break;
726 	default:
727 		KASSERT(0);
728 	}
729 
730 	return !match;
731 }
732 
733 
734 /*
735  * Get a reference to the next free event state from the ring.
736  */
737 struct dt_evt *
738 dt_pcb_ring_get(struct dt_pcb *dp, int profiling)
739 {
740 	struct proc *p = curproc;
741 	struct dt_evt *dtev;
742 	int distance;
743 
744 	if (dt_pcb_filter(dp))
745 		return NULL;
746 
747 	mtx_enter(&dp->dp_mtx);
748 	distance = dp->dp_prod - dp->dp_cons;
749 	if (distance == 1 || distance == (1 - DT_EVTRING_SIZE)) {
750 		/* read(2) isn't finished */
751 		dp->dp_dropevt++;
752 		mtx_leave(&dp->dp_mtx);
753 		return NULL;
754 	}
755 
756 	/*
757 	 * Save states in next free event slot.
758 	 */
759 	dtev = &dp->dp_ring[dp->dp_cons];
760 	memset(dtev, 0, sizeof(*dtev));
761 
762 	dtev->dtev_pbn = dp->dp_dtp->dtp_pbn;
763 	dtev->dtev_cpu = cpu_number();
764 	dtev->dtev_pid = p->p_p->ps_pid;
765 	dtev->dtev_tid = p->p_tid + THREAD_PID_OFFSET;
766 	nanotime(&dtev->dtev_tsp);
767 
768 	if (ISSET(dp->dp_evtflags, DTEVT_EXECNAME))
769 		strlcpy(dtev->dtev_comm, p->p_p->ps_comm, sizeof(dtev->dtev_comm));
770 
771 	if (ISSET(dp->dp_evtflags, DTEVT_KSTACK)) {
772 		if (profiling)
773 			stacktrace_save_at(&dtev->dtev_kstack, DT_FA_PROFILE);
774 		else
775 			stacktrace_save_at(&dtev->dtev_kstack, DT_FA_STATIC);
776 	}
777 	if (ISSET(dp->dp_evtflags, DTEVT_USTACK))
778 		stacktrace_save_utrace(&dtev->dtev_ustack);
779 
780 	return dtev;
781 }
782 
783 void
784 dt_pcb_ring_consume(struct dt_pcb *dp, struct dt_evt *dtev)
785 {
786 	MUTEX_ASSERT_LOCKED(&dp->dp_mtx);
787 	KASSERT(dtev == &dp->dp_ring[dp->dp_cons]);
788 
789 	dp->dp_cons = (dp->dp_cons + 1) % DT_EVTRING_SIZE;
790 	mtx_leave(&dp->dp_mtx);
791 
792 	mtx_enter(&dp->dp_sc->ds_mtx);
793 	dp->dp_sc->ds_evtcnt++;
794 	mtx_leave(&dp->dp_sc->ds_mtx);
795 	wakeup(dp->dp_sc);
796 }
797 
798 /*
799  * Copy at most `qlen' events from `dp', producing the same amount
800  * of free slots.
801  */
802 int
803 dt_pcb_ring_copy(struct dt_pcb *dp, struct dt_evt *estq, size_t qlen,
804     uint64_t *dropped)
805 {
806 	size_t count, copied = 0;
807 	unsigned int cons, prod;
808 
809 	KASSERT(qlen > 0);
810 
811 	mtx_enter(&dp->dp_mtx);
812 	cons = dp->dp_cons;
813 	prod = dp->dp_prod;
814 
815 	if (cons < prod)
816 		count = DT_EVTRING_SIZE - prod;
817 	else
818 		count = cons - prod;
819 
820 	if (count == 0)
821 		goto out;
822 
823 	*dropped += dp->dp_dropevt;
824 	dp->dp_dropevt = 0;
825 
826 	count = MIN(count, qlen);
827 
828 	memcpy(&estq[0], &dp->dp_ring[prod], count * sizeof(*estq));
829 	copied += count;
830 
831 	/* Produce */
832 	prod = (prod + count) % DT_EVTRING_SIZE;
833 
834 	/* If the queue is full or the ring didn't wrap, stop here. */
835 	if (qlen == copied || prod != 0 || cons == 0)
836 		goto out;
837 
838 	count = MIN(cons, (qlen - copied));
839 	memcpy(&estq[copied], &dp->dp_ring[0], count * sizeof(*estq));
840 	copied += count;
841 	prod += count;
842 
843 out:
844 	dp->dp_prod = prod;
845 	mtx_leave(&dp->dp_mtx);
846 	return copied;
847 }
848