xref: /openbsd-src/sys/net/ifq.c (revision fc405d53b73a2d73393cb97f684863d17b583e38)
1 /*	$OpenBSD: ifq.c,v 1.49 2023/01/09 03:39:14 dlg Exp $ */
2 
3 /*
4  * Copyright (c) 2015 David Gwynne <dlg@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include "bpfilter.h"
20 #include "kstat.h"
21 
22 #include <sys/param.h>
23 #include <sys/systm.h>
24 #include <sys/socket.h>
25 #include <sys/mbuf.h>
26 #include <sys/proc.h>
27 #include <sys/sysctl.h>
28 
29 #include <net/if.h>
30 #include <net/if_var.h>
31 
32 #if NBPFILTER > 0
33 #include <net/bpf.h>
34 #endif
35 
36 #if NKSTAT > 0
37 #include <sys/kstat.h>
38 #endif
39 
40 /*
41  * priq glue
42  */
43 unsigned int	 priq_idx(unsigned int, const struct mbuf *);
44 struct mbuf	*priq_enq(struct ifqueue *, struct mbuf *);
45 struct mbuf	*priq_deq_begin(struct ifqueue *, void **);
46 void		 priq_deq_commit(struct ifqueue *, struct mbuf *, void *);
47 void		 priq_purge(struct ifqueue *, struct mbuf_list *);
48 
49 void		*priq_alloc(unsigned int, void *);
50 void		 priq_free(unsigned int, void *);
51 
52 const struct ifq_ops priq_ops = {
53 	priq_idx,
54 	priq_enq,
55 	priq_deq_begin,
56 	priq_deq_commit,
57 	priq_purge,
58 	priq_alloc,
59 	priq_free,
60 };
61 
62 const struct ifq_ops * const ifq_priq_ops = &priq_ops;
63 
64 /*
65  * priq internal structures
66  */
67 
68 struct priq {
69 	struct mbuf_list	 pq_lists[IFQ_NQUEUES];
70 };
71 
72 /*
73  * ifqueue serialiser
74  */
75 
76 void	ifq_start_task(void *);
77 void	ifq_restart_task(void *);
78 void	ifq_barrier_task(void *);
79 void	ifq_bundle_task(void *);
80 
81 static inline void
82 ifq_run_start(struct ifqueue *ifq)
83 {
84 	ifq_serialize(ifq, &ifq->ifq_start);
85 }
86 
87 void
88 ifq_serialize(struct ifqueue *ifq, struct task *t)
89 {
90 	struct task work;
91 
92 	if (ISSET(t->t_flags, TASK_ONQUEUE))
93 		return;
94 
95 	mtx_enter(&ifq->ifq_task_mtx);
96 	if (!ISSET(t->t_flags, TASK_ONQUEUE)) {
97 		SET(t->t_flags, TASK_ONQUEUE);
98 		TAILQ_INSERT_TAIL(&ifq->ifq_task_list, t, t_entry);
99 	}
100 
101 	if (ifq->ifq_serializer == NULL) {
102 		ifq->ifq_serializer = curcpu();
103 
104 		while ((t = TAILQ_FIRST(&ifq->ifq_task_list)) != NULL) {
105 			TAILQ_REMOVE(&ifq->ifq_task_list, t, t_entry);
106 			CLR(t->t_flags, TASK_ONQUEUE);
107 			work = *t; /* copy to caller to avoid races */
108 
109 			mtx_leave(&ifq->ifq_task_mtx);
110 
111 			(*work.t_func)(work.t_arg);
112 
113 			mtx_enter(&ifq->ifq_task_mtx);
114 		}
115 
116 		ifq->ifq_serializer = NULL;
117 	}
118 	mtx_leave(&ifq->ifq_task_mtx);
119 }
120 
121 int
122 ifq_is_serialized(struct ifqueue *ifq)
123 {
124 	return (ifq->ifq_serializer == curcpu());
125 }
126 
127 void
128 ifq_start(struct ifqueue *ifq)
129 {
130 	if (ifq_len(ifq) >= min(ifq->ifq_if->if_txmit, ifq->ifq_maxlen)) {
131 		task_del(ifq->ifq_softnet, &ifq->ifq_bundle);
132 		ifq_run_start(ifq);
133 	} else
134 		task_add(ifq->ifq_softnet, &ifq->ifq_bundle);
135 }
136 
137 void
138 ifq_start_task(void *p)
139 {
140 	struct ifqueue *ifq = p;
141 	struct ifnet *ifp = ifq->ifq_if;
142 
143 	if (!ISSET(ifp->if_flags, IFF_RUNNING) ||
144 	    ifq_empty(ifq) || ifq_is_oactive(ifq))
145 		return;
146 
147 	ifp->if_qstart(ifq);
148 }
149 
150 void
151 ifq_restart_task(void *p)
152 {
153 	struct ifqueue *ifq = p;
154 	struct ifnet *ifp = ifq->ifq_if;
155 
156 	ifq_clr_oactive(ifq);
157 	ifp->if_qstart(ifq);
158 }
159 
160 void
161 ifq_bundle_task(void *p)
162 {
163 	struct ifqueue *ifq = p;
164 
165 	ifq_run_start(ifq);
166 }
167 
168 void
169 ifq_barrier(struct ifqueue *ifq)
170 {
171 	struct cond c = COND_INITIALIZER();
172 	struct task t = TASK_INITIALIZER(ifq_barrier_task, &c);
173 
174 	task_del(ifq->ifq_softnet, &ifq->ifq_bundle);
175 
176 	if (ifq->ifq_serializer == NULL)
177 		return;
178 
179 	ifq_serialize(ifq, &t);
180 
181 	cond_wait(&c, "ifqbar");
182 }
183 
184 void
185 ifq_barrier_task(void *p)
186 {
187 	struct cond *c = p;
188 
189 	cond_signal(c);
190 }
191 
192 /*
193  * ifqueue mbuf queue API
194  */
195 
196 #if NKSTAT > 0
197 struct ifq_kstat_data {
198 	struct kstat_kv kd_packets;
199 	struct kstat_kv kd_bytes;
200 	struct kstat_kv kd_qdrops;
201 	struct kstat_kv kd_errors;
202 	struct kstat_kv kd_qlen;
203 	struct kstat_kv kd_maxqlen;
204 	struct kstat_kv kd_oactive;
205 };
206 
207 static const struct ifq_kstat_data ifq_kstat_tpl = {
208 	KSTAT_KV_UNIT_INITIALIZER("packets",
209 	    KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS),
210 	KSTAT_KV_UNIT_INITIALIZER("bytes",
211 	    KSTAT_KV_T_COUNTER64, KSTAT_KV_U_BYTES),
212 	KSTAT_KV_UNIT_INITIALIZER("qdrops",
213 	    KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS),
214 	KSTAT_KV_UNIT_INITIALIZER("errors",
215 	    KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS),
216 	KSTAT_KV_UNIT_INITIALIZER("qlen",
217 	    KSTAT_KV_T_UINT32, KSTAT_KV_U_PACKETS),
218 	KSTAT_KV_UNIT_INITIALIZER("maxqlen",
219 	    KSTAT_KV_T_UINT32, KSTAT_KV_U_PACKETS),
220 	KSTAT_KV_INITIALIZER("oactive", KSTAT_KV_T_BOOL),
221 };
222 
223 int
224 ifq_kstat_copy(struct kstat *ks, void *dst)
225 {
226 	struct ifqueue *ifq = ks->ks_softc;
227 	struct ifq_kstat_data *kd = dst;
228 
229 	*kd = ifq_kstat_tpl;
230 	kstat_kv_u64(&kd->kd_packets) = ifq->ifq_packets;
231 	kstat_kv_u64(&kd->kd_bytes) = ifq->ifq_bytes;
232 	kstat_kv_u64(&kd->kd_qdrops) = ifq->ifq_qdrops;
233 	kstat_kv_u64(&kd->kd_errors) = ifq->ifq_errors;
234 	kstat_kv_u32(&kd->kd_qlen) = ifq->ifq_len;
235 	kstat_kv_u32(&kd->kd_maxqlen) = ifq->ifq_maxlen;
236 	kstat_kv_bool(&kd->kd_oactive) = ifq->ifq_oactive;
237 
238 	return (0);
239 }
240 #endif
241 
242 void
243 ifq_init(struct ifqueue *ifq, struct ifnet *ifp, unsigned int idx)
244 {
245 	ifq->ifq_if = ifp;
246 	ifq->ifq_softnet = net_tq(ifp->if_index + idx);
247 	ifq->ifq_softc = NULL;
248 
249 	mtx_init(&ifq->ifq_mtx, IPL_NET);
250 
251 	/* default to priq */
252 	ifq->ifq_ops = &priq_ops;
253 	ifq->ifq_q = priq_ops.ifqop_alloc(idx, NULL);
254 
255 	ml_init(&ifq->ifq_free);
256 	ifq->ifq_len = 0;
257 
258 	ifq->ifq_packets = 0;
259 	ifq->ifq_bytes = 0;
260 	ifq->ifq_qdrops = 0;
261 	ifq->ifq_errors = 0;
262 	ifq->ifq_mcasts = 0;
263 
264 	mtx_init(&ifq->ifq_task_mtx, IPL_NET);
265 	TAILQ_INIT(&ifq->ifq_task_list);
266 	ifq->ifq_serializer = NULL;
267 	task_set(&ifq->ifq_bundle, ifq_bundle_task, ifq);
268 
269 	task_set(&ifq->ifq_start, ifq_start_task, ifq);
270 	task_set(&ifq->ifq_restart, ifq_restart_task, ifq);
271 
272 	if (ifq->ifq_maxlen == 0)
273 		ifq_set_maxlen(ifq, IFQ_MAXLEN);
274 
275 	ifq->ifq_idx = idx;
276 
277 #if NKSTAT > 0
278 	/* XXX xname vs driver name and unit */
279 	ifq->ifq_kstat = kstat_create(ifp->if_xname, 0,
280 	    "txq", ifq->ifq_idx, KSTAT_T_KV, 0);
281 	KASSERT(ifq->ifq_kstat != NULL);
282 	kstat_set_mutex(ifq->ifq_kstat, &ifq->ifq_mtx);
283 	ifq->ifq_kstat->ks_softc = ifq;
284 	ifq->ifq_kstat->ks_datalen = sizeof(ifq_kstat_tpl);
285 	ifq->ifq_kstat->ks_copy = ifq_kstat_copy;
286 	kstat_install(ifq->ifq_kstat);
287 #endif
288 }
289 
290 void
291 ifq_attach(struct ifqueue *ifq, const struct ifq_ops *newops, void *opsarg)
292 {
293 	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
294 	struct mbuf_list free_ml = MBUF_LIST_INITIALIZER();
295 	struct mbuf *m;
296 	const struct ifq_ops *oldops;
297 	void *newq, *oldq;
298 
299 	newq = newops->ifqop_alloc(ifq->ifq_idx, opsarg);
300 
301 	mtx_enter(&ifq->ifq_mtx);
302 	ifq->ifq_ops->ifqop_purge(ifq, &ml);
303 	ifq->ifq_len = 0;
304 
305 	oldops = ifq->ifq_ops;
306 	oldq = ifq->ifq_q;
307 
308 	ifq->ifq_ops = newops;
309 	ifq->ifq_q = newq;
310 
311 	while ((m = ml_dequeue(&ml)) != NULL) {
312 		m = ifq->ifq_ops->ifqop_enq(ifq, m);
313 		if (m != NULL) {
314 			ifq->ifq_qdrops++;
315 			ml_enqueue(&free_ml, m);
316 		} else
317 			ifq->ifq_len++;
318 	}
319 	mtx_leave(&ifq->ifq_mtx);
320 
321 	oldops->ifqop_free(ifq->ifq_idx, oldq);
322 
323 	ml_purge(&free_ml);
324 }
325 
326 void
327 ifq_destroy(struct ifqueue *ifq)
328 {
329 	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
330 
331 #if NKSTAT > 0
332 	kstat_destroy(ifq->ifq_kstat);
333 #endif
334 
335 	NET_ASSERT_UNLOCKED();
336 	if (!task_del(ifq->ifq_softnet, &ifq->ifq_bundle))
337 		taskq_barrier(ifq->ifq_softnet);
338 
339 	/* don't need to lock because this is the last use of the ifq */
340 
341 	ifq->ifq_ops->ifqop_purge(ifq, &ml);
342 	ifq->ifq_ops->ifqop_free(ifq->ifq_idx, ifq->ifq_q);
343 
344 	ml_purge(&ml);
345 }
346 
347 void
348 ifq_add_data(struct ifqueue *ifq, struct if_data *data)
349 {
350 	mtx_enter(&ifq->ifq_mtx);
351 	data->ifi_opackets += ifq->ifq_packets;
352 	data->ifi_obytes += ifq->ifq_bytes;
353 	data->ifi_oqdrops += ifq->ifq_qdrops;
354 	data->ifi_omcasts += ifq->ifq_mcasts;
355 	/* ifp->if_data.ifi_oerrors */
356 	mtx_leave(&ifq->ifq_mtx);
357 }
358 
359 int
360 ifq_enqueue(struct ifqueue *ifq, struct mbuf *m)
361 {
362 	struct mbuf *dm;
363 
364 	mtx_enter(&ifq->ifq_mtx);
365 	dm = ifq->ifq_ops->ifqop_enq(ifq, m);
366 	if (dm != m) {
367 		ifq->ifq_packets++;
368 		ifq->ifq_bytes += m->m_pkthdr.len;
369 		if (ISSET(m->m_flags, M_MCAST))
370 			ifq->ifq_mcasts++;
371 	}
372 
373 	if (dm == NULL)
374 		ifq->ifq_len++;
375 	else
376 		ifq->ifq_qdrops++;
377 	mtx_leave(&ifq->ifq_mtx);
378 
379 	if (dm != NULL)
380 		m_freem(dm);
381 
382 	return (dm == m ? ENOBUFS : 0);
383 }
384 
385 static inline void
386 ifq_deq_enter(struct ifqueue *ifq)
387 {
388 	mtx_enter(&ifq->ifq_mtx);
389 }
390 
391 static inline void
392 ifq_deq_leave(struct ifqueue *ifq)
393 {
394 	struct mbuf_list ml;
395 
396 	ml = ifq->ifq_free;
397 	ml_init(&ifq->ifq_free);
398 
399 	mtx_leave(&ifq->ifq_mtx);
400 
401 	if (!ml_empty(&ml))
402 		ml_purge(&ml);
403 }
404 
405 struct mbuf *
406 ifq_deq_begin(struct ifqueue *ifq)
407 {
408 	struct mbuf *m = NULL;
409 	void *cookie;
410 
411 	ifq_deq_enter(ifq);
412 	if (ifq->ifq_len == 0 ||
413 	    (m = ifq->ifq_ops->ifqop_deq_begin(ifq, &cookie)) == NULL) {
414 		ifq_deq_leave(ifq);
415 		return (NULL);
416 	}
417 
418 	m->m_pkthdr.ph_cookie = cookie;
419 
420 	return (m);
421 }
422 
423 void
424 ifq_deq_commit(struct ifqueue *ifq, struct mbuf *m)
425 {
426 	void *cookie;
427 
428 	KASSERT(m != NULL);
429 	cookie = m->m_pkthdr.ph_cookie;
430 
431 	ifq->ifq_ops->ifqop_deq_commit(ifq, m, cookie);
432 	ifq->ifq_len--;
433 	ifq_deq_leave(ifq);
434 }
435 
436 void
437 ifq_deq_rollback(struct ifqueue *ifq, struct mbuf *m)
438 {
439 	KASSERT(m != NULL);
440 
441 	ifq_deq_leave(ifq);
442 }
443 
444 struct mbuf *
445 ifq_dequeue(struct ifqueue *ifq)
446 {
447 	struct mbuf *m;
448 
449 	m = ifq_deq_begin(ifq);
450 	if (m == NULL)
451 		return (NULL);
452 
453 	ifq_deq_commit(ifq, m);
454 
455 	return (m);
456 }
457 
458 int
459 ifq_deq_sleep(struct ifqueue *ifq, struct mbuf **mp, int nbio, int priority,
460     const char *wmesg, volatile unsigned int *sleeping,
461     volatile unsigned int *alive)
462 {
463 	struct mbuf *m;
464 	void *cookie;
465 	int error = 0;
466 
467 	ifq_deq_enter(ifq);
468 	if (ifq->ifq_len == 0 && nbio)
469 		error = EWOULDBLOCK;
470 	else {
471 		for (;;) {
472 			m = ifq->ifq_ops->ifqop_deq_begin(ifq, &cookie);
473 			if (m != NULL) {
474 				ifq->ifq_ops->ifqop_deq_commit(ifq, m, cookie);
475 				ifq->ifq_len--;
476 				*mp = m;
477 				break;
478 			}
479 
480 			(*sleeping)++;
481 			error = msleep_nsec(ifq, &ifq->ifq_mtx,
482 			    priority, wmesg, INFSLP);
483 			(*sleeping)--;
484 			if (error != 0)
485 				break;
486 			if (!(*alive)) {
487 				error = EIO;
488 				break;
489 			}
490 		}
491 	}
492 	ifq_deq_leave(ifq);
493 
494 	return (error);
495 }
496 
497 int
498 ifq_hdatalen(struct ifqueue *ifq)
499 {
500 	struct mbuf *m;
501 	int len = 0;
502 
503 	if (ifq_empty(ifq))
504 		return (0);
505 
506 	m = ifq_deq_begin(ifq);
507 	if (m != NULL) {
508 		len = m->m_pkthdr.len;
509 		ifq_deq_rollback(ifq, m);
510 	}
511 
512 	return (len);
513 }
514 
515 unsigned int
516 ifq_purge(struct ifqueue *ifq)
517 {
518 	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
519 	unsigned int rv;
520 
521 	mtx_enter(&ifq->ifq_mtx);
522 	ifq->ifq_ops->ifqop_purge(ifq, &ml);
523 	rv = ifq->ifq_len;
524 	ifq->ifq_len = 0;
525 	ifq->ifq_qdrops += rv;
526 	mtx_leave(&ifq->ifq_mtx);
527 
528 	KASSERT(rv == ml_len(&ml));
529 
530 	ml_purge(&ml);
531 
532 	return (rv);
533 }
534 
535 void *
536 ifq_q_enter(struct ifqueue *ifq, const struct ifq_ops *ops)
537 {
538 	mtx_enter(&ifq->ifq_mtx);
539 	if (ifq->ifq_ops == ops)
540 		return (ifq->ifq_q);
541 
542 	mtx_leave(&ifq->ifq_mtx);
543 
544 	return (NULL);
545 }
546 
547 void
548 ifq_q_leave(struct ifqueue *ifq, void *q)
549 {
550 	KASSERT(q == ifq->ifq_q);
551 	mtx_leave(&ifq->ifq_mtx);
552 }
553 
554 void
555 ifq_mfreem(struct ifqueue *ifq, struct mbuf *m)
556 {
557 	MUTEX_ASSERT_LOCKED(&ifq->ifq_mtx);
558 
559 	ifq->ifq_len--;
560 	ifq->ifq_qdrops++;
561 	ml_enqueue(&ifq->ifq_free, m);
562 }
563 
564 void
565 ifq_mfreeml(struct ifqueue *ifq, struct mbuf_list *ml)
566 {
567 	MUTEX_ASSERT_LOCKED(&ifq->ifq_mtx);
568 
569 	ifq->ifq_len -= ml_len(ml);
570 	ifq->ifq_qdrops += ml_len(ml);
571 	ml_enlist(&ifq->ifq_free, ml);
572 }
573 
574 /*
575  * ifiq
576  */
577 
578 #if NKSTAT > 0
579 struct ifiq_kstat_data {
580 	struct kstat_kv kd_packets;
581 	struct kstat_kv kd_bytes;
582 	struct kstat_kv kd_fdrops;
583 	struct kstat_kv kd_qdrops;
584 	struct kstat_kv kd_errors;
585 	struct kstat_kv kd_qlen;
586 
587 	struct kstat_kv kd_enqueues;
588 	struct kstat_kv kd_dequeues;
589 };
590 
591 static const struct ifiq_kstat_data ifiq_kstat_tpl = {
592 	KSTAT_KV_UNIT_INITIALIZER("packets",
593 	    KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS),
594 	KSTAT_KV_UNIT_INITIALIZER("bytes",
595 	    KSTAT_KV_T_COUNTER64, KSTAT_KV_U_BYTES),
596 	KSTAT_KV_UNIT_INITIALIZER("fdrops",
597 	    KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS),
598 	KSTAT_KV_UNIT_INITIALIZER("qdrops",
599 	    KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS),
600 	KSTAT_KV_UNIT_INITIALIZER("errors",
601 	    KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS),
602 	KSTAT_KV_UNIT_INITIALIZER("qlen",
603 	    KSTAT_KV_T_UINT32, KSTAT_KV_U_PACKETS),
604 
605 	KSTAT_KV_INITIALIZER("enqueues",
606 	    KSTAT_KV_T_COUNTER64),
607 	KSTAT_KV_INITIALIZER("dequeues",
608 	    KSTAT_KV_T_COUNTER64),
609 };
610 
611 int
612 ifiq_kstat_copy(struct kstat *ks, void *dst)
613 {
614 	struct ifiqueue *ifiq = ks->ks_softc;
615 	struct ifiq_kstat_data *kd = dst;
616 
617 	*kd = ifiq_kstat_tpl;
618 	kstat_kv_u64(&kd->kd_packets) = ifiq->ifiq_packets;
619 	kstat_kv_u64(&kd->kd_bytes) = ifiq->ifiq_bytes;
620 	kstat_kv_u64(&kd->kd_fdrops) = ifiq->ifiq_fdrops;
621 	kstat_kv_u64(&kd->kd_qdrops) = ifiq->ifiq_qdrops;
622 	kstat_kv_u64(&kd->kd_errors) = ifiq->ifiq_errors;
623 	kstat_kv_u32(&kd->kd_qlen) = ml_len(&ifiq->ifiq_ml);
624 
625 	kstat_kv_u64(&kd->kd_enqueues) = ifiq->ifiq_enqueues;
626 	kstat_kv_u64(&kd->kd_dequeues) = ifiq->ifiq_dequeues;
627 
628 	return (0);
629 }
630 #endif
631 
632 static void	ifiq_process(void *);
633 
634 void
635 ifiq_init(struct ifiqueue *ifiq, struct ifnet *ifp, unsigned int idx)
636 {
637 	ifiq->ifiq_if = ifp;
638 	ifiq->ifiq_softnet = net_tq(ifp->if_index + idx);
639 	ifiq->ifiq_softc = NULL;
640 
641 	mtx_init(&ifiq->ifiq_mtx, IPL_NET);
642 	ml_init(&ifiq->ifiq_ml);
643 	task_set(&ifiq->ifiq_task, ifiq_process, ifiq);
644 	ifiq->ifiq_pressure = 0;
645 
646 	ifiq->ifiq_packets = 0;
647 	ifiq->ifiq_bytes = 0;
648 	ifiq->ifiq_fdrops = 0;
649 	ifiq->ifiq_qdrops = 0;
650 	ifiq->ifiq_errors = 0;
651 
652 	ifiq->ifiq_idx = idx;
653 
654 #if NKSTAT > 0
655 	/* XXX xname vs driver name and unit */
656 	ifiq->ifiq_kstat = kstat_create(ifp->if_xname, 0,
657 	    "rxq", ifiq->ifiq_idx, KSTAT_T_KV, 0);
658 	KASSERT(ifiq->ifiq_kstat != NULL);
659 	kstat_set_mutex(ifiq->ifiq_kstat, &ifiq->ifiq_mtx);
660 	ifiq->ifiq_kstat->ks_softc = ifiq;
661 	ifiq->ifiq_kstat->ks_datalen = sizeof(ifiq_kstat_tpl);
662 	ifiq->ifiq_kstat->ks_copy = ifiq_kstat_copy;
663 	kstat_install(ifiq->ifiq_kstat);
664 #endif
665 }
666 
667 void
668 ifiq_destroy(struct ifiqueue *ifiq)
669 {
670 #if NKSTAT > 0
671 	kstat_destroy(ifiq->ifiq_kstat);
672 #endif
673 
674 	NET_ASSERT_UNLOCKED();
675 	if (!task_del(ifiq->ifiq_softnet, &ifiq->ifiq_task))
676 		taskq_barrier(ifiq->ifiq_softnet);
677 
678 	/* don't need to lock because this is the last use of the ifiq */
679 	ml_purge(&ifiq->ifiq_ml);
680 }
681 
682 unsigned int ifiq_maxlen_drop = 2048 * 5;
683 unsigned int ifiq_maxlen_return = 2048 * 3;
684 
685 int
686 ifiq_input(struct ifiqueue *ifiq, struct mbuf_list *ml)
687 {
688 	struct ifnet *ifp = ifiq->ifiq_if;
689 	struct mbuf *m;
690 	uint64_t packets;
691 	uint64_t bytes = 0;
692 	uint64_t fdrops = 0;
693 	unsigned int len;
694 #if NBPFILTER > 0
695 	caddr_t if_bpf;
696 #endif
697 
698 	if (ml_empty(ml))
699 		return (0);
700 
701 	MBUF_LIST_FOREACH(ml, m) {
702 		m->m_pkthdr.ph_ifidx = ifp->if_index;
703 		m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
704 		bytes += m->m_pkthdr.len;
705 	}
706 	packets = ml_len(ml);
707 
708 #if NBPFILTER > 0
709 	if_bpf = ifp->if_bpf;
710 	if (if_bpf) {
711 		struct mbuf_list ml0 = *ml;
712 
713 		ml_init(ml);
714 
715 		while ((m = ml_dequeue(&ml0)) != NULL) {
716 			if ((*ifp->if_bpf_mtap)(if_bpf, m, BPF_DIRECTION_IN)) {
717 				m_freem(m);
718 				fdrops++;
719 			} else
720 				ml_enqueue(ml, m);
721 		}
722 
723 		if (ml_empty(ml)) {
724 			mtx_enter(&ifiq->ifiq_mtx);
725 			ifiq->ifiq_packets += packets;
726 			ifiq->ifiq_bytes += bytes;
727 			ifiq->ifiq_fdrops += fdrops;
728 			mtx_leave(&ifiq->ifiq_mtx);
729 
730 			return (0);
731 		}
732 	}
733 #endif
734 
735 	mtx_enter(&ifiq->ifiq_mtx);
736 	ifiq->ifiq_packets += packets;
737 	ifiq->ifiq_bytes += bytes;
738 	ifiq->ifiq_fdrops += fdrops;
739 
740 	len = ml_len(&ifiq->ifiq_ml);
741 	if (__predict_true(!ISSET(ifp->if_xflags, IFXF_MONITOR))) {
742 		if (len > ifiq_maxlen_drop)
743 			ifiq->ifiq_qdrops += ml_len(ml);
744 		else {
745 			ifiq->ifiq_enqueues++;
746 			ml_enlist(&ifiq->ifiq_ml, ml);
747 		}
748 	}
749 	mtx_leave(&ifiq->ifiq_mtx);
750 
751 	if (ml_empty(ml))
752 		task_add(ifiq->ifiq_softnet, &ifiq->ifiq_task);
753 	else
754 		ml_purge(ml);
755 
756 	return (len > ifiq_maxlen_return);
757 }
758 
759 void
760 ifiq_add_data(struct ifiqueue *ifiq, struct if_data *data)
761 {
762 	mtx_enter(&ifiq->ifiq_mtx);
763 	data->ifi_ipackets += ifiq->ifiq_packets;
764 	data->ifi_ibytes += ifiq->ifiq_bytes;
765 	data->ifi_iqdrops += ifiq->ifiq_qdrops;
766 	mtx_leave(&ifiq->ifiq_mtx);
767 }
768 
769 int
770 ifiq_enqueue(struct ifiqueue *ifiq, struct mbuf *m)
771 {
772 	struct ifnet *ifp = ifiq->ifiq_if;
773 #if NBPFILTER > 0
774 	caddr_t if_bpf = ifp->if_bpf;
775 #endif
776 
777 	m->m_pkthdr.ph_ifidx = ifp->if_index;
778 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
779 
780 #if NBPFILTER > 0
781 	if_bpf = ifp->if_bpf;
782 	if (if_bpf) {
783 		if ((*ifp->if_bpf_mtap)(if_bpf, m, BPF_DIRECTION_IN)) {
784 			mtx_enter(&ifiq->ifiq_mtx);
785 			ifiq->ifiq_packets++;
786 			ifiq->ifiq_bytes += m->m_pkthdr.len;
787 			ifiq->ifiq_fdrops++;
788 			mtx_leave(&ifiq->ifiq_mtx);
789 
790 			m_freem(m);
791 			return (0);
792 		}
793 	}
794 #endif
795 
796 	mtx_enter(&ifiq->ifiq_mtx);
797 	ifiq->ifiq_packets++;
798 	ifiq->ifiq_bytes += m->m_pkthdr.len;
799 	ifiq->ifiq_enqueues++;
800 	ml_enqueue(&ifiq->ifiq_ml, m);
801 	mtx_leave(&ifiq->ifiq_mtx);
802 
803 	task_add(ifiq->ifiq_softnet, &ifiq->ifiq_task);
804 
805 	return (0);
806 }
807 
808 static void
809 ifiq_process(void *arg)
810 {
811 	struct ifiqueue *ifiq = arg;
812 	struct mbuf_list ml;
813 
814 	if (ifiq_empty(ifiq))
815 		return;
816 
817 	mtx_enter(&ifiq->ifiq_mtx);
818 	ifiq->ifiq_dequeues++;
819 	ml = ifiq->ifiq_ml;
820 	ml_init(&ifiq->ifiq_ml);
821 	mtx_leave(&ifiq->ifiq_mtx);
822 
823 	if_input_process(ifiq->ifiq_if, &ml);
824 }
825 
826 int
827 net_ifiq_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp,
828     void *newp, size_t newlen)
829 {
830 	int error = EOPNOTSUPP;
831 /* pressure is disabled for 6.6-release */
832 #if 0
833 	int val;
834 
835 	if (namelen != 1)
836 		return (EISDIR);
837 
838 	switch (name[0]) {
839 	case NET_LINK_IFRXQ_PRESSURE_RETURN:
840 		val = ifiq_pressure_return;
841 		error = sysctl_int(oldp, oldlenp, newp, newlen, &val);
842 		if (error != 0)
843 			return (error);
844 		if (val < 1 || val > ifiq_pressure_drop)
845 			return (EINVAL);
846 		ifiq_pressure_return = val;
847 		break;
848 	case NET_LINK_IFRXQ_PRESSURE_DROP:
849 		val = ifiq_pressure_drop;
850 		error = sysctl_int(oldp, oldlenp, newp, newlen, &val);
851 		if (error != 0)
852 			return (error);
853 		if (ifiq_pressure_return > val)
854 			return (EINVAL);
855 		ifiq_pressure_drop = val;
856 		break;
857 	default:
858 		error = EOPNOTSUPP;
859 		break;
860 	}
861 #endif
862 
863 	return (error);
864 }
865 
866 /*
867  * priq implementation
868  */
869 
870 unsigned int
871 priq_idx(unsigned int nqueues, const struct mbuf *m)
872 {
873 	unsigned int flow = 0;
874 
875 	if (ISSET(m->m_pkthdr.csum_flags, M_FLOWID))
876 		flow = m->m_pkthdr.ph_flowid;
877 
878 	return (flow % nqueues);
879 }
880 
881 void *
882 priq_alloc(unsigned int idx, void *null)
883 {
884 	struct priq *pq;
885 	int i;
886 
887 	pq = malloc(sizeof(struct priq), M_DEVBUF, M_WAITOK);
888 	for (i = 0; i < IFQ_NQUEUES; i++)
889 		ml_init(&pq->pq_lists[i]);
890 	return (pq);
891 }
892 
893 void
894 priq_free(unsigned int idx, void *pq)
895 {
896 	free(pq, M_DEVBUF, sizeof(struct priq));
897 }
898 
899 struct mbuf *
900 priq_enq(struct ifqueue *ifq, struct mbuf *m)
901 {
902 	struct priq *pq;
903 	struct mbuf_list *pl;
904 	struct mbuf *n = NULL;
905 	unsigned int prio;
906 
907 	pq = ifq->ifq_q;
908 	KASSERT(m->m_pkthdr.pf.prio <= IFQ_MAXPRIO);
909 
910 	/* Find a lower priority queue to drop from */
911 	if (ifq_len(ifq) >= ifq->ifq_maxlen) {
912 		for (prio = 0; prio < m->m_pkthdr.pf.prio; prio++) {
913 			pl = &pq->pq_lists[prio];
914 			if (ml_len(pl) > 0) {
915 				n = ml_dequeue(pl);
916 				goto enqueue;
917 			}
918 		}
919 		/*
920 		 * There's no lower priority queue that we can
921 		 * drop from so don't enqueue this one.
922 		 */
923 		return (m);
924 	}
925 
926  enqueue:
927 	pl = &pq->pq_lists[m->m_pkthdr.pf.prio];
928 	ml_enqueue(pl, m);
929 
930 	return (n);
931 }
932 
933 struct mbuf *
934 priq_deq_begin(struct ifqueue *ifq, void **cookiep)
935 {
936 	struct priq *pq = ifq->ifq_q;
937 	struct mbuf_list *pl;
938 	unsigned int prio = nitems(pq->pq_lists);
939 	struct mbuf *m;
940 
941 	do {
942 		pl = &pq->pq_lists[--prio];
943 		m = MBUF_LIST_FIRST(pl);
944 		if (m != NULL) {
945 			*cookiep = pl;
946 			return (m);
947 		}
948 	} while (prio > 0);
949 
950 	return (NULL);
951 }
952 
953 void
954 priq_deq_commit(struct ifqueue *ifq, struct mbuf *m, void *cookie)
955 {
956 	struct mbuf_list *pl = cookie;
957 
958 	KASSERT(MBUF_LIST_FIRST(pl) == m);
959 
960 	ml_dequeue(pl);
961 }
962 
963 void
964 priq_purge(struct ifqueue *ifq, struct mbuf_list *ml)
965 {
966 	struct priq *pq = ifq->ifq_q;
967 	struct mbuf_list *pl;
968 	unsigned int prio = nitems(pq->pq_lists);
969 
970 	do {
971 		pl = &pq->pq_lists[--prio];
972 		ml_enlist(ml, pl);
973 	} while (prio > 0);
974 }
975