xref: /openbsd-src/sys/dev/dt/dt_dev.c (revision 1a8dbaac879b9f3335ad7fb25429ce63ac1d6bac)
1 /*	$OpenBSD: dt_dev.c,v 1.10 2020/09/28 13:16:58 kettenis Exp $ */
2 
3 /*
4  * Copyright (c) 2019 Martin Pieuchot <mpi@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/types.h>
20 #include <sys/systm.h>
21 #include <sys/param.h>
22 #include <sys/device.h>
23 #include <sys/malloc.h>
24 #include <sys/proc.h>
25 
26 #include <dev/dt/dtvar.h>
27 
28 /*
29  * Number of frames to skip in stack traces.
30  *
31  * The number of frames required to execute dt(4) profiling code
32  * depends on the probe, context, architecture and possibly the
33  * compiler.
34  *
35  * Static probes (tracepoints) are executed in the context of the
36  * current thread and only need to skip frames up to the recording
37  * function.  For example the syscall provider:
38  *
39  *	dt_prov_syscall_entry+0x141
40  *	syscall+0x205		<--- start here
41  *	Xsyscall+0x128
42  *
43  * Probes executed in their own context, like the profile provider,
44  * need to skip the frames of that context which are different for
45  * every architecture.  For example the profile provider executed
46  * from hardclock(9) on amd64:
47  *
48  *	dt_prov_profile_enter+0x6e
49  *	hardclock+0x1a9
50  *	lapic_clockintr+0x3f
51  *	Xresume_lapic_ltimer+0x26
52  *	acpicpu_idle+0x1d2	<---- start here.
53  *	sched_idle+0x225
54  *	proc_trampoline+0x1c
55  */
56 #if defined(__amd64__)
57 #define DT_FA_PROFILE	5
58 #define DT_FA_STATIC	2
59 #elif defined(__powerpc64__)
60 #define DT_FA_PROFILE	6
61 #define DT_FA_STATIC	2
62 #elif defined(__sparc64__)
63 #define DT_FA_PROFILE	5
64 #define DT_FA_STATIC	1
65 #else
66 #define DT_FA_STATIC	0
67 #define DT_FA_PROFILE	0
68 #endif
69 
70 #define DT_EVTRING_SIZE	16	/* # of slots in per PCB event ring */
71 
72 #define DPRINTF(x...) /* nothing */
73 
74 /*
75  * Descriptor associated with each program opening /dev/dt.  It is used
76  * to keep track of enabled PCBs.
77  *
78  *  Locks used to protect struct members in this file:
79  *	m	per-softc mutex
80  *	K	kernel lock
81  */
82 struct dt_softc {
83 	SLIST_ENTRY(dt_softc)	 ds_next;	/* [K] descriptor list */
84 	int			 ds_unit;	/* [I] D_CLONE unique unit */
85 	pid_t			 ds_pid;	/* [I] PID of tracing program */
86 
87 	struct mutex		 ds_mtx;
88 
89 	struct dt_pcb_list	 ds_pcbs;	/* [K] list of enabled PCBs */
90 	struct dt_evt		*ds_bufqueue;	/* [K] copy evts to userland */
91 	size_t			 ds_bufqlen;	/* [K] length of the queue */
92 	int			 ds_recording;	/* [K] currently recording? */
93 	int			 ds_evtcnt;	/* [m] # of readable evts */
94 
95 	/* Counters */
96 	uint64_t		 ds_readevt;	/* [m] # of events read */
97 	uint64_t		 ds_dropevt;	/* [m] # of events dropped */
98 };
99 
100 SLIST_HEAD(, dt_softc) dtdev_list;	/* [K] list of open /dev/dt nodes */
101 
102 /*
103  * Probes are created during dt_attach() and never modified/freed during
104  * the lifetime of the system.  That's why we consider them as [I]mmutable.
105  */
106 unsigned int			dt_nprobes;	/* [I] # of probes available */
107 SIMPLEQ_HEAD(, dt_probe)	dt_probe_list;	/* [I] list of probes */
108 
109 struct rwlock			dt_lock = RWLOCK_INITIALIZER("dtlk");
110 volatile uint32_t		dt_tracing = 0;	/* [K] # of processes tracing */
111 
112 void	dtattach(struct device *, struct device *, void *);
113 int	dtopen(dev_t, int, int, struct proc *);
114 int	dtclose(dev_t, int, int, struct proc *);
115 int	dtread(dev_t, struct uio *, int);
116 int	dtioctl(dev_t, u_long, caddr_t, int, struct proc *);
117 
118 struct	dt_softc *dtlookup(int);
119 
120 int	dt_ioctl_list_probes(struct dt_softc *, struct dtioc_probe *);
121 int	dt_ioctl_get_stats(struct dt_softc *, struct dtioc_stat *);
122 int	dt_ioctl_record_start(struct dt_softc *);
123 void	dt_ioctl_record_stop(struct dt_softc *);
124 int	dt_ioctl_probe_enable(struct dt_softc *, struct dtioc_req *);
125 void	dt_ioctl_probe_disable(struct dt_softc *, struct dtioc_req *);
126 
127 int	dt_pcb_ring_copy(struct dt_pcb *, struct dt_evt *, size_t, uint64_t *);
128 
129 void
130 dtattach(struct device *parent, struct device *self, void *aux)
131 {
132 	SLIST_INIT(&dtdev_list);
133 	SIMPLEQ_INIT(&dt_probe_list);
134 
135 	/* Init providers */
136 	dt_nprobes += dt_prov_profile_init();
137 	dt_nprobes += dt_prov_syscall_init();
138 	dt_nprobes += dt_prov_static_init();
139 
140 	printf("dt: %u probes\n", dt_nprobes);
141 }
142 
143 int
144 dtopen(dev_t dev, int flags, int mode, struct proc *p)
145 {
146 	struct dt_softc *sc;
147 	int unit = minor(dev);
148 	extern int allowdt;
149 
150 	if (!allowdt)
151 		return EPERM;
152 
153 	KASSERT(dtlookup(unit) == NULL);
154 
155 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_CANFAIL|M_ZERO);
156 	if (sc == NULL)
157 		return ENOMEM;
158 
159 	/*
160 	 * Enough space to empty 2 full rings of events in a single read.
161 	 */
162 	sc->ds_bufqlen = 2 * DT_EVTRING_SIZE;
163 	sc->ds_bufqueue = mallocarray(sc->ds_bufqlen, sizeof(*sc->ds_bufqueue),
164 	    M_DEVBUF, M_WAITOK|M_CANFAIL);
165 	if (sc->ds_bufqueue == NULL)
166 		goto bad;
167 
168 	sc->ds_unit = unit;
169 	sc->ds_pid = p->p_p->ps_pid;
170 	TAILQ_INIT(&sc->ds_pcbs);
171 	mtx_init(&sc->ds_mtx, IPL_HIGH);
172 	sc->ds_evtcnt = 0;
173 	sc->ds_readevt = 0;
174 	sc->ds_dropevt = 0;
175 
176 	SLIST_INSERT_HEAD(&dtdev_list, sc, ds_next);
177 
178 	DPRINTF("dt%d: pid %d open\n", sc->ds_unit, sc->ds_pid);
179 
180 	return 0;
181 
182 bad:
183 	free(sc, M_DEVBUF, sizeof(*sc));
184 	return ENOMEM;
185 }
186 
187 int
188 dtclose(dev_t dev, int flags, int mode, struct proc *p)
189 {
190 	struct dt_softc *sc;
191 	int unit = minor(dev);
192 
193 	sc = dtlookup(unit);
194 	KASSERT(sc != NULL);
195 
196 	DPRINTF("dt%d: pid %d close\n", sc->ds_unit, sc->ds_pid);
197 
198 	SLIST_REMOVE(&dtdev_list, sc, dt_softc, ds_next);
199 	dt_ioctl_record_stop(sc);
200 	dt_pcb_purge(&sc->ds_pcbs);
201 
202 	free(sc->ds_bufqueue, M_DEVBUF,
203 	    sc->ds_bufqlen * sizeof(*sc->ds_bufqueue));
204 	free(sc, M_DEVBUF, sizeof(*sc));
205 
206 	return 0;
207 }
208 
209 int
210 dtread(dev_t dev, struct uio *uio, int flags)
211 {
212 	struct sleep_state sls;
213 	struct dt_softc *sc;
214 	struct dt_evt *estq;
215 	struct dt_pcb *dp;
216 	int error, unit = minor(dev);
217 	size_t qlen, count, read = 0;
218 	uint64_t dropped = 0;
219 
220 	sc = dtlookup(unit);
221 	KASSERT(sc != NULL);
222 
223 	count = howmany(uio->uio_resid, sizeof(struct dt_evt));
224 	if (count < 1)
225 		return (EMSGSIZE);
226 
227 	while (!sc->ds_evtcnt) {
228 		sleep_setup(&sls, sc, PWAIT | PCATCH, "dtread");
229 		sleep_setup_signal(&sls);
230 		sleep_finish(&sls, !sc->ds_evtcnt);
231 		error = sleep_finish_signal(&sls);
232 		if (error == EINTR || error == ERESTART)
233 			break;
234 	}
235 	if (error)
236 		return error;
237 
238 	estq = sc->ds_bufqueue;
239 	qlen = MIN(sc->ds_bufqlen, count);
240 
241 	KERNEL_ASSERT_LOCKED();
242 	TAILQ_FOREACH(dp, &sc->ds_pcbs, dp_snext) {
243 		count = dt_pcb_ring_copy(dp, estq, qlen, &dropped);
244 		read += count;
245 		estq += count; /* pointer aritmetic */
246 		qlen -= count;
247 		if (qlen == 0)
248 			break;
249 	}
250 	if (read > 0)
251 		uiomove(sc->ds_bufqueue, read * sizeof(struct dt_evt), uio);
252 
253 	mtx_enter(&sc->ds_mtx);
254 	sc->ds_evtcnt -= read;
255 	sc->ds_readevt += read;
256 	sc->ds_dropevt += dropped;
257 	mtx_leave(&sc->ds_mtx);
258 
259 	return 0;
260 }
261 
262 int
263 dtioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p)
264 {
265 	struct dt_softc *sc;
266 	int unit = minor(dev);
267 	int on, error = 0;
268 
269 	sc = dtlookup(unit);
270 	KASSERT(sc != NULL);
271 
272 	switch (cmd) {
273 	case DTIOCGPLIST:
274 		return dt_ioctl_list_probes(sc, (struct dtioc_probe *)addr);
275 	case DTIOCGSTATS:
276 		return dt_ioctl_get_stats(sc, (struct dtioc_stat *)addr);
277 	case DTIOCRECORD:
278 	case DTIOCPRBENABLE:
279 		/* root only ioctl(2) */
280 		break;
281 	default:
282 		return ENOTTY;
283 	}
284 
285 	if ((error = suser(p)) != 0)
286 		return error;
287 
288 	switch (cmd) {
289 	case DTIOCRECORD:
290 		on = *(int *)addr;
291 		if (on)
292 			error = dt_ioctl_record_start(sc);
293 		else
294 			dt_ioctl_record_stop(sc);
295 		break;
296 	case DTIOCPRBENABLE:
297 		error = dt_ioctl_probe_enable(sc, (struct dtioc_req *)addr);
298 		break;
299 	default:
300 		KASSERT(0);
301 	}
302 
303 	return error;
304 }
305 
306 struct dt_softc *
307 dtlookup(int unit)
308 {
309 	struct dt_softc *sc;
310 
311 	KERNEL_ASSERT_LOCKED();
312 
313 	SLIST_FOREACH(sc, &dtdev_list, ds_next) {
314 		if (sc->ds_unit == unit)
315 			break;
316 	}
317 
318 	return sc;
319 }
320 
321 int
322 dtioc_req_isvalid(struct dtioc_req *dtrq)
323 {
324 	switch (dtrq->dtrq_filter.dtf_operand) {
325 	case DT_OP_NONE:
326 	case DT_OP_EQ:
327 	case DT_OP_NE:
328 		break;
329 	default:
330 		return 0;
331 	}
332 
333 	switch (dtrq->dtrq_filter.dtf_variable) {
334 	case DT_FV_NONE:
335 	case DT_FV_PID:
336 	case DT_FV_TID:
337 		break;
338 	default:
339 		return 0;
340 	}
341 
342 	return 1;
343 }
344 
345 int
346 dt_ioctl_list_probes(struct dt_softc *sc, struct dtioc_probe *dtpr)
347 {
348 	struct dtioc_probe_info info, *dtpi;
349 	struct dt_probe *dtp;
350 	size_t size;
351 	int error = 0;
352 
353 	size = dtpr->dtpr_size;
354 	dtpr->dtpr_size = dt_nprobes * sizeof(*dtpi);
355 	if (size == 0)
356 		return 0;
357 
358 	dtpi = dtpr->dtpr_probes;
359 	memset(&info, 0, sizeof(info));
360 	SIMPLEQ_FOREACH(dtp, &dt_probe_list, dtp_next) {
361 		if (size < sizeof(*dtpi)) {
362 			error = ENOSPC;
363 			break;
364 		}
365 		info.dtpi_pbn = dtp->dtp_pbn;
366 		info.dtpi_nargs = dtp->dtp_nargs;
367 		strlcpy(info.dtpi_prov, dtp->dtp_prov->dtpv_name,
368 		    sizeof(info.dtpi_prov));
369 		strlcpy(info.dtpi_func, dtp->dtp_func, sizeof(info.dtpi_func));
370 		strlcpy(info.dtpi_name, dtp->dtp_name, sizeof(info.dtpi_name));
371 		error = copyout(&info, dtpi, sizeof(*dtpi));
372 		if (error)
373 			break;
374 		size -= sizeof(*dtpi);
375 		dtpi++;
376 	};
377 
378 	return error;
379 }
380 
381 int
382 dt_ioctl_get_stats(struct dt_softc *sc, struct dtioc_stat *dtst)
383 {
384 	mtx_enter(&sc->ds_mtx);
385 	dtst->dtst_readevt = sc->ds_readevt;
386 	dtst->dtst_dropevt = sc->ds_dropevt;
387 	mtx_leave(&sc->ds_mtx);
388 
389 	return 0;
390 }
391 
392 int
393 dt_ioctl_record_start(struct dt_softc *sc)
394 {
395 	struct dt_pcb *dp;
396 
397 	if (sc->ds_recording)
398 		return EBUSY;
399 
400 	KERNEL_ASSERT_LOCKED();
401  	if (TAILQ_EMPTY(&sc->ds_pcbs))
402 		return ENOENT;
403 
404 	rw_enter_write(&dt_lock);
405 	TAILQ_FOREACH(dp, &sc->ds_pcbs, dp_snext) {
406 		struct dt_probe *dtp = dp->dp_dtp;
407 
408 		SMR_SLIST_INSERT_HEAD_LOCKED(&dtp->dtp_pcbs, dp, dp_pnext);
409 		dtp->dtp_recording++;
410 		dtp->dtp_prov->dtpv_recording++;
411 	}
412 	rw_exit_write(&dt_lock);
413 
414 	sc->ds_recording = 1;
415 	dt_tracing++;
416 
417 	return 0;
418 }
419 
420 void
421 dt_ioctl_record_stop(struct dt_softc *sc)
422 {
423 	struct dt_pcb *dp;
424 
425 	KASSERT(suser(curproc) == 0);
426 
427 	if (!sc->ds_recording)
428 		return;
429 
430 	DPRINTF("dt%d: pid %d disable\n", sc->ds_unit, sc->ds_pid);
431 
432 	dt_tracing--;
433 	sc->ds_recording = 0;
434 
435 	rw_enter_write(&dt_lock);
436 	TAILQ_FOREACH(dp, &sc->ds_pcbs, dp_snext) {
437 		struct dt_probe *dtp = dp->dp_dtp;
438 
439 		dtp->dtp_recording--;
440 		dtp->dtp_prov->dtpv_recording--;
441 		SMR_SLIST_REMOVE_LOCKED(&dtp->dtp_pcbs, dp, dt_pcb, dp_pnext);
442 	}
443 	rw_exit_write(&dt_lock);
444 
445 	/* Wait until readers cannot access the PCBs. */
446 	smr_barrier();
447 }
448 
449 int
450 dt_ioctl_probe_enable(struct dt_softc *sc, struct dtioc_req *dtrq)
451 {
452 	struct dt_pcb_list plist;
453 	struct dt_probe *dtp;
454 	int error;
455 
456 	KASSERT(suser(curproc) == 0);
457 
458 	if (!dtioc_req_isvalid(dtrq))
459 		return EINVAL;
460 
461 	SIMPLEQ_FOREACH(dtp, &dt_probe_list, dtp_next) {
462 		if (dtp->dtp_pbn == dtrq->dtrq_pbn)
463 			break;
464 	}
465 	if (dtp == NULL)
466 		return ENOENT;
467 
468 	TAILQ_INIT(&plist);
469 	error = dtp->dtp_prov->dtpv_alloc(dtp, sc, &plist, dtrq);
470 	if (error)
471 		return error;
472 
473 	DPRINTF("dt%d: pid %d enable %u : %b\n", sc->ds_unit, sc->ds_pid,
474 	    dtrq->dtrq_pbn, (unsigned int)dtrq->dtrq_evtflags, DTEVT_FLAG_BITS);
475 
476 	/* Append all PCBs to this instance */
477 	TAILQ_CONCAT(&sc->ds_pcbs, &plist, dp_snext);
478 
479 	return 0;
480 }
481 
482 struct dt_probe *
483 dt_dev_alloc_probe(const char *func, const char *name, struct dt_provider *dtpv)
484 {
485 	struct dt_probe *dtp;
486 
487 	dtp = malloc(sizeof(*dtp), M_DT, M_NOWAIT|M_ZERO);
488 	if (dtp == NULL)
489 		return NULL;
490 
491 	SMR_SLIST_INIT(&dtp->dtp_pcbs);
492 	dtp->dtp_prov = dtpv;
493 	dtp->dtp_func = func;
494 	dtp->dtp_name = name;
495 	dtp->dtp_sysnum = -1;
496 
497 	return dtp;
498 }
499 
500 void
501 dt_dev_register_probe(struct dt_probe *dtp)
502 {
503 	static uint64_t probe_nb;
504 
505 	dtp->dtp_pbn = ++probe_nb;
506 	SIMPLEQ_INSERT_TAIL(&dt_probe_list, dtp, dtp_next);
507 }
508 
509 struct dt_pcb *
510 dt_pcb_alloc(struct dt_probe *dtp, struct dt_softc *sc)
511 {
512 	struct dt_pcb *dp;
513 
514 	dp = malloc(sizeof(*dp), M_DT, M_WAITOK|M_CANFAIL|M_ZERO);
515 	if (dp == NULL)
516 		goto bad;
517 
518 	dp->dp_ring = mallocarray(DT_EVTRING_SIZE, sizeof(*dp->dp_ring), M_DT,
519 	    M_WAITOK|M_CANFAIL|M_ZERO);
520 	if (dp->dp_ring == NULL)
521 		goto bad;
522 
523 	mtx_init(&dp->dp_mtx, IPL_HIGH);
524 	dp->dp_sc = sc;
525 	dp->dp_dtp = dtp;
526 	return dp;
527 bad:
528 	dt_pcb_free(dp);
529 	return NULL;
530 }
531 
532 void
533 dt_pcb_free(struct dt_pcb *dp)
534 {
535 	if (dp == NULL)
536 		return;
537 	free(dp->dp_ring, M_DT, DT_EVTRING_SIZE * sizeof(*dp->dp_ring));
538 	free(dp, M_DT, sizeof(*dp));
539 }
540 
541 void
542 dt_pcb_purge(struct dt_pcb_list *plist)
543 {
544 	struct dt_pcb *dp;
545 
546 	while ((dp = TAILQ_FIRST(plist)) != NULL) {
547 		TAILQ_REMOVE(plist, dp, dp_snext);
548 		dt_pcb_free(dp);
549 	}
550 }
551 
552 int
553 dt_pcb_filter(struct dt_pcb *dp)
554 {
555 	struct dt_filter *dtf = &dp->dp_filter;
556 	struct proc *p = curproc;
557 	unsigned int var;
558 	int match = 1;
559 
560 	/* Filter out tracing program. */
561 	if (dp->dp_sc->ds_pid == p->p_p->ps_pid)
562 		return 1;
563 
564 	switch (dtf->dtf_variable) {
565 	case DT_FV_PID:
566 		var = p->p_p->ps_pid;
567 		break;
568 	case DT_FV_TID:
569 		var = p->p_tid;
570 		break;
571 	case DT_FV_NONE:
572 		break;
573 	default:
574 		KASSERT(0);
575 	}
576 
577 	switch (dtf->dtf_operand) {
578 	case DT_OP_EQ:
579 		match = !!(var == dtf->dtf_value);
580 		break;
581 	case DT_OP_NE:
582 		match = !!(var != dtf->dtf_value);
583 		break;
584 	case DT_OP_NONE:
585 		break;
586 	default:
587 		KASSERT(0);
588 	}
589 
590 	return !match;
591 }
592 
593 
594 /*
595  * Get a reference to the next free event state from the ring.
596  */
597 struct dt_evt *
598 dt_pcb_ring_get(struct dt_pcb *dp, int profiling)
599 {
600 	struct proc *p = curproc;
601 	struct dt_evt *dtev;
602 	int distance;
603 
604 	if (dt_pcb_filter(dp))
605 		return NULL;
606 
607 	mtx_enter(&dp->dp_mtx);
608 	distance = dp->dp_prod - dp->dp_cons;
609 	if (distance == 1 || distance == (1 - DT_EVTRING_SIZE)) {
610 		/* read(2) isn't finished */
611 		dp->dp_dropevt++;
612 		mtx_leave(&dp->dp_mtx);
613 		return NULL;
614 	}
615 
616 	/*
617 	 * Save states in next free event slot.
618 	 */
619 	dtev = &dp->dp_ring[dp->dp_cons];
620 	memset(dtev, 0, sizeof(*dtev));
621 
622 	dtev->dtev_pbn = dp->dp_dtp->dtp_pbn;
623 	dtev->dtev_cpu = cpu_number();
624 	dtev->dtev_pid = p->p_p->ps_pid;
625 	dtev->dtev_tid = p->p_tid;
626 	nanotime(&dtev->dtev_tsp);
627 
628 	if (ISSET(dp->dp_evtflags, DTEVT_EXECNAME))
629 		memcpy(dtev->dtev_comm, p->p_p->ps_comm, DTMAXCOMLEN - 1);
630 
631 	if (ISSET(dp->dp_evtflags, DTEVT_KSTACK|DTEVT_USTACK)) {
632 		if (profiling)
633 			stacktrace_save_at(&dtev->dtev_kstack, DT_FA_PROFILE);
634 		else
635 			stacktrace_save_at(&dtev->dtev_kstack, DT_FA_STATIC);
636 	}
637 
638 	return dtev;
639 }
640 
641 void
642 dt_pcb_ring_consume(struct dt_pcb *dp, struct dt_evt *dtev)
643 {
644 	MUTEX_ASSERT_LOCKED(&dp->dp_mtx);
645 	KASSERT(dtev == &dp->dp_ring[dp->dp_cons]);
646 
647 	dp->dp_cons = (dp->dp_cons + 1) % DT_EVTRING_SIZE;
648 	mtx_leave(&dp->dp_mtx);
649 
650 	mtx_enter(&dp->dp_sc->ds_mtx);
651 	dp->dp_sc->ds_evtcnt++;
652 	mtx_leave(&dp->dp_sc->ds_mtx);
653 	wakeup(dp->dp_sc);
654 }
655 
656 /*
657  * Copy at most `qlen' events from `dp', producing the same amount
658  * of free slots.
659  */
660 int
661 dt_pcb_ring_copy(struct dt_pcb *dp, struct dt_evt *estq, size_t qlen,
662     uint64_t *dropped)
663 {
664 	size_t count, copied = 0;
665 	unsigned int cons, prod;
666 
667 	KASSERT(qlen > 0);
668 
669 	mtx_enter(&dp->dp_mtx);
670 	cons = dp->dp_cons;
671 	prod = dp->dp_prod;
672 
673 	if (cons < prod)
674 		count = DT_EVTRING_SIZE - prod;
675 	else
676 		count = cons - prod;
677 
678 	if (count == 0)
679 		goto out;
680 
681 	*dropped += dp->dp_dropevt;
682 	dp->dp_dropevt = 0;
683 
684 	count = MIN(count, qlen);
685 
686 	memcpy(&estq[0], &dp->dp_ring[prod], count * sizeof(*estq));
687 	copied += count;
688 
689 	/* Produce */
690 	prod = (prod + count) % DT_EVTRING_SIZE;
691 
692 	/* If the queue is full or the ring didn't wrap, stop here. */
693 	if (qlen == copied || prod != 0 || cons == 0)
694 		goto out;
695 
696 	count = MIN(cons, (qlen - copied));
697 	memcpy(&estq[copied], &dp->dp_ring[0], count * sizeof(*estq));
698 	copied += count;
699 	prod += count;
700 
701 out:
702 	dp->dp_prod = prod;
703 	mtx_leave(&dp->dp_mtx);
704 	return copied;
705 }
706