xref: /openbsd-src/sys/net/ifq.c (revision 3374c67d44f9b75b98444cbf63020f777792342e)
1 /*	$OpenBSD: ifq.c,v 1.47 2022/11/22 03:40:53 dlg Exp $ */
2 
3 /*
4  * Copyright (c) 2015 David Gwynne <dlg@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include "bpfilter.h"
20 #include "kstat.h"
21 
22 #include <sys/param.h>
23 #include <sys/systm.h>
24 #include <sys/socket.h>
25 #include <sys/mbuf.h>
26 #include <sys/proc.h>
27 #include <sys/sysctl.h>
28 
29 #include <net/if.h>
30 #include <net/if_var.h>
31 
32 #if NBPFILTER > 0
33 #include <net/bpf.h>
34 #endif
35 
36 #if NKSTAT > 0
37 #include <sys/kstat.h>
38 #endif
39 
40 /*
41  * priq glue
42  */
43 unsigned int	 priq_idx(unsigned int, const struct mbuf *);
44 struct mbuf	*priq_enq(struct ifqueue *, struct mbuf *);
45 struct mbuf	*priq_deq_begin(struct ifqueue *, void **);
46 void		 priq_deq_commit(struct ifqueue *, struct mbuf *, void *);
47 void		 priq_purge(struct ifqueue *, struct mbuf_list *);
48 
49 void		*priq_alloc(unsigned int, void *);
50 void		 priq_free(unsigned int, void *);
51 
52 const struct ifq_ops priq_ops = {
53 	priq_idx,
54 	priq_enq,
55 	priq_deq_begin,
56 	priq_deq_commit,
57 	priq_purge,
58 	priq_alloc,
59 	priq_free,
60 };
61 
62 const struct ifq_ops * const ifq_priq_ops = &priq_ops;
63 
64 /*
65  * priq internal structures
66  */
67 
68 struct priq {
69 	struct mbuf_list	 pq_lists[IFQ_NQUEUES];
70 };
71 
72 /*
73  * ifqueue serialiser
74  */
75 
76 void	ifq_start_task(void *);
77 void	ifq_restart_task(void *);
78 void	ifq_barrier_task(void *);
79 void	ifq_bundle_task(void *);
80 
81 static inline void
82 ifq_run_start(struct ifqueue *ifq)
83 {
84 	ifq_serialize(ifq, &ifq->ifq_start);
85 }
86 
87 void
88 ifq_serialize(struct ifqueue *ifq, struct task *t)
89 {
90 	struct task work;
91 
92 	if (ISSET(t->t_flags, TASK_ONQUEUE))
93 		return;
94 
95 	mtx_enter(&ifq->ifq_task_mtx);
96 	if (!ISSET(t->t_flags, TASK_ONQUEUE)) {
97 		SET(t->t_flags, TASK_ONQUEUE);
98 		TAILQ_INSERT_TAIL(&ifq->ifq_task_list, t, t_entry);
99 	}
100 
101 	if (ifq->ifq_serializer == NULL) {
102 		ifq->ifq_serializer = curcpu();
103 
104 		while ((t = TAILQ_FIRST(&ifq->ifq_task_list)) != NULL) {
105 			TAILQ_REMOVE(&ifq->ifq_task_list, t, t_entry);
106 			CLR(t->t_flags, TASK_ONQUEUE);
107 			work = *t; /* copy to caller to avoid races */
108 
109 			mtx_leave(&ifq->ifq_task_mtx);
110 
111 			(*work.t_func)(work.t_arg);
112 
113 			mtx_enter(&ifq->ifq_task_mtx);
114 		}
115 
116 		ifq->ifq_serializer = NULL;
117 	}
118 	mtx_leave(&ifq->ifq_task_mtx);
119 }
120 
121 int
122 ifq_is_serialized(struct ifqueue *ifq)
123 {
124 	return (ifq->ifq_serializer == curcpu());
125 }
126 
127 void
128 ifq_start(struct ifqueue *ifq)
129 {
130 	if (ifq_len(ifq) >= min(ifq->ifq_if->if_txmit, ifq->ifq_maxlen)) {
131 		task_del(ifq->ifq_softnet, &ifq->ifq_bundle);
132 		ifq_run_start(ifq);
133 	} else
134 		task_add(ifq->ifq_softnet, &ifq->ifq_bundle);
135 }
136 
137 void
138 ifq_start_task(void *p)
139 {
140 	struct ifqueue *ifq = p;
141 	struct ifnet *ifp = ifq->ifq_if;
142 
143 	if (!ISSET(ifp->if_flags, IFF_RUNNING) ||
144 	    ifq_empty(ifq) || ifq_is_oactive(ifq))
145 		return;
146 
147 	ifp->if_qstart(ifq);
148 }
149 
150 void
151 ifq_restart_task(void *p)
152 {
153 	struct ifqueue *ifq = p;
154 	struct ifnet *ifp = ifq->ifq_if;
155 
156 	ifq_clr_oactive(ifq);
157 	ifp->if_qstart(ifq);
158 }
159 
160 void
161 ifq_bundle_task(void *p)
162 {
163 	struct ifqueue *ifq = p;
164 
165 	ifq_run_start(ifq);
166 }
167 
168 void
169 ifq_barrier(struct ifqueue *ifq)
170 {
171 	struct cond c = COND_INITIALIZER();
172 	struct task t = TASK_INITIALIZER(ifq_barrier_task, &c);
173 
174 	task_del(ifq->ifq_softnet, &ifq->ifq_bundle);
175 
176 	if (ifq->ifq_serializer == NULL)
177 		return;
178 
179 	ifq_serialize(ifq, &t);
180 
181 	cond_wait(&c, "ifqbar");
182 }
183 
184 void
185 ifq_barrier_task(void *p)
186 {
187 	struct cond *c = p;
188 
189 	cond_signal(c);
190 }
191 
192 /*
193  * ifqueue mbuf queue API
194  */
195 
196 #if NKSTAT > 0
197 struct ifq_kstat_data {
198 	struct kstat_kv kd_packets;
199 	struct kstat_kv kd_bytes;
200 	struct kstat_kv kd_qdrops;
201 	struct kstat_kv kd_errors;
202 	struct kstat_kv kd_qlen;
203 	struct kstat_kv kd_maxqlen;
204 	struct kstat_kv kd_oactive;
205 };
206 
207 static const struct ifq_kstat_data ifq_kstat_tpl = {
208 	KSTAT_KV_UNIT_INITIALIZER("packets",
209 	    KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS),
210 	KSTAT_KV_UNIT_INITIALIZER("bytes",
211 	    KSTAT_KV_T_COUNTER64, KSTAT_KV_U_BYTES),
212 	KSTAT_KV_UNIT_INITIALIZER("qdrops",
213 	    KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS),
214 	KSTAT_KV_UNIT_INITIALIZER("errors",
215 	    KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS),
216 	KSTAT_KV_UNIT_INITIALIZER("qlen",
217 	    KSTAT_KV_T_UINT32, KSTAT_KV_U_PACKETS),
218 	KSTAT_KV_UNIT_INITIALIZER("maxqlen",
219 	    KSTAT_KV_T_UINT32, KSTAT_KV_U_PACKETS),
220 	KSTAT_KV_INITIALIZER("oactive", KSTAT_KV_T_BOOL),
221 };
222 
223 int
224 ifq_kstat_copy(struct kstat *ks, void *dst)
225 {
226 	struct ifqueue *ifq = ks->ks_softc;
227 	struct ifq_kstat_data *kd = dst;
228 
229 	*kd = ifq_kstat_tpl;
230 	kstat_kv_u64(&kd->kd_packets) = ifq->ifq_packets;
231 	kstat_kv_u64(&kd->kd_bytes) = ifq->ifq_bytes;
232 	kstat_kv_u64(&kd->kd_qdrops) = ifq->ifq_qdrops;
233 	kstat_kv_u64(&kd->kd_errors) = ifq->ifq_errors;
234 	kstat_kv_u32(&kd->kd_qlen) = ifq->ifq_len;
235 	kstat_kv_u32(&kd->kd_maxqlen) = ifq->ifq_maxlen;
236 	kstat_kv_bool(&kd->kd_oactive) = ifq->ifq_oactive;
237 
238 	return (0);
239 }
240 #endif
241 
242 void
243 ifq_init(struct ifqueue *ifq, struct ifnet *ifp, unsigned int idx)
244 {
245 	ifq->ifq_if = ifp;
246 	ifq->ifq_softnet = net_tq(ifp->if_index + idx);
247 	ifq->ifq_softc = NULL;
248 
249 	mtx_init(&ifq->ifq_mtx, IPL_NET);
250 
251 	/* default to priq */
252 	ifq->ifq_ops = &priq_ops;
253 	ifq->ifq_q = priq_ops.ifqop_alloc(idx, NULL);
254 
255 	ml_init(&ifq->ifq_free);
256 	ifq->ifq_len = 0;
257 
258 	ifq->ifq_packets = 0;
259 	ifq->ifq_bytes = 0;
260 	ifq->ifq_qdrops = 0;
261 	ifq->ifq_errors = 0;
262 	ifq->ifq_mcasts = 0;
263 
264 	mtx_init(&ifq->ifq_task_mtx, IPL_NET);
265 	TAILQ_INIT(&ifq->ifq_task_list);
266 	ifq->ifq_serializer = NULL;
267 	task_set(&ifq->ifq_bundle, ifq_bundle_task, ifq);
268 
269 	task_set(&ifq->ifq_start, ifq_start_task, ifq);
270 	task_set(&ifq->ifq_restart, ifq_restart_task, ifq);
271 
272 	if (ifq->ifq_maxlen == 0)
273 		ifq_set_maxlen(ifq, IFQ_MAXLEN);
274 
275 	ifq->ifq_idx = idx;
276 
277 #if NKSTAT > 0
278 	/* XXX xname vs driver name and unit */
279 	ifq->ifq_kstat = kstat_create(ifp->if_xname, 0,
280 	    "txq", ifq->ifq_idx, KSTAT_T_KV, 0);
281 	KASSERT(ifq->ifq_kstat != NULL);
282 	kstat_set_mutex(ifq->ifq_kstat, &ifq->ifq_mtx);
283 	ifq->ifq_kstat->ks_softc = ifq;
284 	ifq->ifq_kstat->ks_datalen = sizeof(ifq_kstat_tpl);
285 	ifq->ifq_kstat->ks_copy = ifq_kstat_copy;
286 	kstat_install(ifq->ifq_kstat);
287 #endif
288 }
289 
290 void
291 ifq_attach(struct ifqueue *ifq, const struct ifq_ops *newops, void *opsarg)
292 {
293 	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
294 	struct mbuf_list free_ml = MBUF_LIST_INITIALIZER();
295 	struct mbuf *m;
296 	const struct ifq_ops *oldops;
297 	void *newq, *oldq;
298 
299 	newq = newops->ifqop_alloc(ifq->ifq_idx, opsarg);
300 
301 	mtx_enter(&ifq->ifq_mtx);
302 	ifq->ifq_ops->ifqop_purge(ifq, &ml);
303 	ifq->ifq_len = 0;
304 
305 	oldops = ifq->ifq_ops;
306 	oldq = ifq->ifq_q;
307 
308 	ifq->ifq_ops = newops;
309 	ifq->ifq_q = newq;
310 
311 	while ((m = ml_dequeue(&ml)) != NULL) {
312 		m = ifq->ifq_ops->ifqop_enq(ifq, m);
313 		if (m != NULL) {
314 			ifq->ifq_qdrops++;
315 			ml_enqueue(&free_ml, m);
316 		} else
317 			ifq->ifq_len++;
318 	}
319 	mtx_leave(&ifq->ifq_mtx);
320 
321 	oldops->ifqop_free(ifq->ifq_idx, oldq);
322 
323 	ml_purge(&free_ml);
324 }
325 
326 void
327 ifq_destroy(struct ifqueue *ifq)
328 {
329 	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
330 
331 #if NKSTAT > 0
332 	kstat_destroy(ifq->ifq_kstat);
333 #endif
334 
335 	NET_ASSERT_UNLOCKED();
336 	if (!task_del(ifq->ifq_softnet, &ifq->ifq_bundle))
337 		taskq_barrier(ifq->ifq_softnet);
338 
339 	/* don't need to lock because this is the last use of the ifq */
340 
341 	ifq->ifq_ops->ifqop_purge(ifq, &ml);
342 	ifq->ifq_ops->ifqop_free(ifq->ifq_idx, ifq->ifq_q);
343 
344 	ml_purge(&ml);
345 }
346 
347 void
348 ifq_add_data(struct ifqueue *ifq, struct if_data *data)
349 {
350 	mtx_enter(&ifq->ifq_mtx);
351 	data->ifi_opackets += ifq->ifq_packets;
352 	data->ifi_obytes += ifq->ifq_bytes;
353 	data->ifi_oqdrops += ifq->ifq_qdrops;
354 	data->ifi_omcasts += ifq->ifq_mcasts;
355 	/* ifp->if_data.ifi_oerrors */
356 	mtx_leave(&ifq->ifq_mtx);
357 }
358 
359 int
360 ifq_enqueue(struct ifqueue *ifq, struct mbuf *m)
361 {
362 	struct mbuf *dm;
363 
364 	mtx_enter(&ifq->ifq_mtx);
365 	dm = ifq->ifq_ops->ifqop_enq(ifq, m);
366 	if (dm != m) {
367 		ifq->ifq_packets++;
368 		ifq->ifq_bytes += m->m_pkthdr.len;
369 		if (ISSET(m->m_flags, M_MCAST))
370 			ifq->ifq_mcasts++;
371 	}
372 
373 	if (dm == NULL)
374 		ifq->ifq_len++;
375 	else
376 		ifq->ifq_qdrops++;
377 	mtx_leave(&ifq->ifq_mtx);
378 
379 	if (dm != NULL)
380 		m_freem(dm);
381 
382 	return (dm == m ? ENOBUFS : 0);
383 }
384 
385 static inline void
386 ifq_deq_enter(struct ifqueue *ifq)
387 {
388 	mtx_enter(&ifq->ifq_mtx);
389 }
390 
391 static inline void
392 ifq_deq_leave(struct ifqueue *ifq)
393 {
394 	struct mbuf_list ml;
395 
396 	ml = ifq->ifq_free;
397 	ml_init(&ifq->ifq_free);
398 
399 	mtx_leave(&ifq->ifq_mtx);
400 
401 	if (!ml_empty(&ml))
402 		ml_purge(&ml);
403 }
404 
405 struct mbuf *
406 ifq_deq_begin(struct ifqueue *ifq)
407 {
408 	struct mbuf *m = NULL;
409 	void *cookie;
410 
411 	ifq_deq_enter(ifq);
412 	if (ifq->ifq_len == 0 ||
413 	    (m = ifq->ifq_ops->ifqop_deq_begin(ifq, &cookie)) == NULL) {
414 		ifq_deq_leave(ifq);
415 		return (NULL);
416 	}
417 
418 	m->m_pkthdr.ph_cookie = cookie;
419 
420 	return (m);
421 }
422 
423 void
424 ifq_deq_commit(struct ifqueue *ifq, struct mbuf *m)
425 {
426 	void *cookie;
427 
428 	KASSERT(m != NULL);
429 	cookie = m->m_pkthdr.ph_cookie;
430 
431 	ifq->ifq_ops->ifqop_deq_commit(ifq, m, cookie);
432 	ifq->ifq_len--;
433 	ifq_deq_leave(ifq);
434 }
435 
436 void
437 ifq_deq_rollback(struct ifqueue *ifq, struct mbuf *m)
438 {
439 	KASSERT(m != NULL);
440 
441 	ifq_deq_leave(ifq);
442 }
443 
444 struct mbuf *
445 ifq_dequeue(struct ifqueue *ifq)
446 {
447 	struct mbuf *m;
448 
449 	m = ifq_deq_begin(ifq);
450 	if (m == NULL)
451 		return (NULL);
452 
453 	ifq_deq_commit(ifq, m);
454 
455 	return (m);
456 }
457 
458 int
459 ifq_deq_sleep(struct ifqueue *ifq, struct mbuf **mp, int nbio, int priority,
460     const char *wmesg, volatile unsigned int *sleeping,
461     volatile unsigned int *alive)
462 {
463 	struct mbuf *m;
464 	void *cookie;
465 	int error = 0;
466 
467 	ifq_deq_enter(ifq);
468 	if (ifq->ifq_len == 0 && nbio)
469 		error = EWOULDBLOCK;
470 	else {
471 		for (;;) {
472 			m = ifq->ifq_ops->ifqop_deq_begin(ifq, &cookie);
473 			if (m != NULL) {
474 				ifq->ifq_ops->ifqop_deq_commit(ifq, m, cookie);
475 				ifq->ifq_len--;
476 				*mp = m;
477 				break;
478 			}
479 
480 			(*sleeping)++;
481 			error = msleep_nsec(ifq, &ifq->ifq_mtx,
482 			    priority, wmesg, INFSLP);
483 			(*sleeping)--;
484 			if (error != 0)
485 				break;
486 			if (!(*alive)) {
487 				error = EIO;
488 				break;
489 			}
490 		}
491 	}
492 	ifq_deq_leave(ifq);
493 
494 	return (error);
495 }
496 
497 int
498 ifq_hdatalen(struct ifqueue *ifq)
499 {
500 	struct mbuf *m;
501 	int len = 0;
502 
503 	if (ifq_empty(ifq))
504 		return (0);
505 
506 	m = ifq_deq_begin(ifq);
507 	if (m != NULL) {
508 		len = m->m_pkthdr.len;
509 		ifq_deq_rollback(ifq, m);
510 	}
511 
512 	return (len);
513 }
514 
515 unsigned int
516 ifq_purge(struct ifqueue *ifq)
517 {
518 	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
519 	unsigned int rv;
520 
521 	mtx_enter(&ifq->ifq_mtx);
522 	ifq->ifq_ops->ifqop_purge(ifq, &ml);
523 	rv = ifq->ifq_len;
524 	ifq->ifq_len = 0;
525 	ifq->ifq_qdrops += rv;
526 	mtx_leave(&ifq->ifq_mtx);
527 
528 	KASSERT(rv == ml_len(&ml));
529 
530 	ml_purge(&ml);
531 
532 	return (rv);
533 }
534 
535 void *
536 ifq_q_enter(struct ifqueue *ifq, const struct ifq_ops *ops)
537 {
538 	mtx_enter(&ifq->ifq_mtx);
539 	if (ifq->ifq_ops == ops)
540 		return (ifq->ifq_q);
541 
542 	mtx_leave(&ifq->ifq_mtx);
543 
544 	return (NULL);
545 }
546 
547 void
548 ifq_q_leave(struct ifqueue *ifq, void *q)
549 {
550 	KASSERT(q == ifq->ifq_q);
551 	mtx_leave(&ifq->ifq_mtx);
552 }
553 
554 void
555 ifq_mfreem(struct ifqueue *ifq, struct mbuf *m)
556 {
557 	MUTEX_ASSERT_LOCKED(&ifq->ifq_mtx);
558 
559 	ifq->ifq_len--;
560 	ifq->ifq_qdrops++;
561 	ml_enqueue(&ifq->ifq_free, m);
562 }
563 
564 void
565 ifq_mfreeml(struct ifqueue *ifq, struct mbuf_list *ml)
566 {
567 	MUTEX_ASSERT_LOCKED(&ifq->ifq_mtx);
568 
569 	ifq->ifq_len -= ml_len(ml);
570 	ifq->ifq_qdrops += ml_len(ml);
571 	ml_enlist(&ifq->ifq_free, ml);
572 }
573 
574 /*
575  * ifiq
576  */
577 
578 #if NKSTAT > 0
579 struct ifiq_kstat_data {
580 	struct kstat_kv kd_packets;
581 	struct kstat_kv kd_bytes;
582 	struct kstat_kv kd_qdrops;
583 	struct kstat_kv kd_errors;
584 	struct kstat_kv kd_qlen;
585 
586 	struct kstat_kv kd_enqueues;
587 	struct kstat_kv kd_dequeues;
588 };
589 
590 static const struct ifiq_kstat_data ifiq_kstat_tpl = {
591 	KSTAT_KV_UNIT_INITIALIZER("packets",
592 	    KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS),
593 	KSTAT_KV_UNIT_INITIALIZER("bytes",
594 	    KSTAT_KV_T_COUNTER64, KSTAT_KV_U_BYTES),
595 	KSTAT_KV_UNIT_INITIALIZER("qdrops",
596 	    KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS),
597 	KSTAT_KV_UNIT_INITIALIZER("errors",
598 	    KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS),
599 	KSTAT_KV_UNIT_INITIALIZER("qlen",
600 	    KSTAT_KV_T_UINT32, KSTAT_KV_U_PACKETS),
601 
602 	KSTAT_KV_INITIALIZER("enqueues",
603 	    KSTAT_KV_T_COUNTER64),
604 	KSTAT_KV_INITIALIZER("dequeues",
605 	    KSTAT_KV_T_COUNTER64),
606 };
607 
608 int
609 ifiq_kstat_copy(struct kstat *ks, void *dst)
610 {
611 	struct ifiqueue *ifiq = ks->ks_softc;
612 	struct ifiq_kstat_data *kd = dst;
613 
614 	*kd = ifiq_kstat_tpl;
615 	kstat_kv_u64(&kd->kd_packets) = ifiq->ifiq_packets;
616 	kstat_kv_u64(&kd->kd_bytes) = ifiq->ifiq_bytes;
617 	kstat_kv_u64(&kd->kd_qdrops) = ifiq->ifiq_qdrops;
618 	kstat_kv_u64(&kd->kd_errors) = ifiq->ifiq_errors;
619 	kstat_kv_u32(&kd->kd_qlen) = ml_len(&ifiq->ifiq_ml);
620 
621 	kstat_kv_u64(&kd->kd_enqueues) = ifiq->ifiq_enqueues;
622 	kstat_kv_u64(&kd->kd_dequeues) = ifiq->ifiq_dequeues;
623 
624 	return (0);
625 }
626 #endif
627 
628 static void	ifiq_process(void *);
629 
630 void
631 ifiq_init(struct ifiqueue *ifiq, struct ifnet *ifp, unsigned int idx)
632 {
633 	ifiq->ifiq_if = ifp;
634 	ifiq->ifiq_softnet = net_tq(ifp->if_index + idx);
635 	ifiq->ifiq_softc = NULL;
636 
637 	mtx_init(&ifiq->ifiq_mtx, IPL_NET);
638 	ml_init(&ifiq->ifiq_ml);
639 	task_set(&ifiq->ifiq_task, ifiq_process, ifiq);
640 	ifiq->ifiq_pressure = 0;
641 
642 	ifiq->ifiq_packets = 0;
643 	ifiq->ifiq_bytes = 0;
644 	ifiq->ifiq_qdrops = 0;
645 	ifiq->ifiq_errors = 0;
646 
647 	ifiq->ifiq_idx = idx;
648 
649 #if NKSTAT > 0
650 	/* XXX xname vs driver name and unit */
651 	ifiq->ifiq_kstat = kstat_create(ifp->if_xname, 0,
652 	    "rxq", ifiq->ifiq_idx, KSTAT_T_KV, 0);
653 	KASSERT(ifiq->ifiq_kstat != NULL);
654 	kstat_set_mutex(ifiq->ifiq_kstat, &ifiq->ifiq_mtx);
655 	ifiq->ifiq_kstat->ks_softc = ifiq;
656 	ifiq->ifiq_kstat->ks_datalen = sizeof(ifiq_kstat_tpl);
657 	ifiq->ifiq_kstat->ks_copy = ifiq_kstat_copy;
658 	kstat_install(ifiq->ifiq_kstat);
659 #endif
660 }
661 
662 void
663 ifiq_destroy(struct ifiqueue *ifiq)
664 {
665 #if NKSTAT > 0
666 	kstat_destroy(ifiq->ifiq_kstat);
667 #endif
668 
669 	NET_ASSERT_UNLOCKED();
670 	if (!task_del(ifiq->ifiq_softnet, &ifiq->ifiq_task))
671 		taskq_barrier(ifiq->ifiq_softnet);
672 
673 	/* don't need to lock because this is the last use of the ifiq */
674 	ml_purge(&ifiq->ifiq_ml);
675 }
676 
677 unsigned int ifiq_maxlen_drop = 2048 * 5;
678 unsigned int ifiq_maxlen_return = 2048 * 3;
679 
680 int
681 ifiq_input(struct ifiqueue *ifiq, struct mbuf_list *ml)
682 {
683 	struct ifnet *ifp = ifiq->ifiq_if;
684 	struct mbuf *m;
685 	uint64_t packets;
686 	uint64_t bytes = 0;
687 	unsigned int len;
688 #if NBPFILTER > 0
689 	caddr_t if_bpf;
690 #endif
691 
692 	if (ml_empty(ml))
693 		return (0);
694 
695 	MBUF_LIST_FOREACH(ml, m) {
696 		m->m_pkthdr.ph_ifidx = ifp->if_index;
697 		m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
698 		bytes += m->m_pkthdr.len;
699 	}
700 	packets = ml_len(ml);
701 
702 #if NBPFILTER > 0
703 	if_bpf = ifp->if_bpf;
704 	if (if_bpf) {
705 		struct mbuf_list ml0 = *ml;
706 
707 		ml_init(ml);
708 
709 		while ((m = ml_dequeue(&ml0)) != NULL) {
710 			if ((*ifp->if_bpf_mtap)(if_bpf, m, BPF_DIRECTION_IN))
711 				m_freem(m);
712 			else
713 				ml_enqueue(ml, m);
714 		}
715 
716 		if (ml_empty(ml)) {
717 			mtx_enter(&ifiq->ifiq_mtx);
718 			ifiq->ifiq_packets += packets;
719 			ifiq->ifiq_bytes += bytes;
720 			mtx_leave(&ifiq->ifiq_mtx);
721 
722 			return (0);
723 		}
724 	}
725 #endif
726 
727 	mtx_enter(&ifiq->ifiq_mtx);
728 	ifiq->ifiq_packets += packets;
729 	ifiq->ifiq_bytes += bytes;
730 
731 	len = ml_len(&ifiq->ifiq_ml);
732 	if (__predict_true(!ISSET(ifp->if_xflags, IFXF_MONITOR))) {
733 		if (len > ifiq_maxlen_drop)
734 			ifiq->ifiq_qdrops += ml_len(ml);
735 		else {
736 			ifiq->ifiq_enqueues++;
737 			ml_enlist(&ifiq->ifiq_ml, ml);
738 		}
739 	}
740 	mtx_leave(&ifiq->ifiq_mtx);
741 
742 	if (ml_empty(ml))
743 		task_add(ifiq->ifiq_softnet, &ifiq->ifiq_task);
744 	else
745 		ml_purge(ml);
746 
747 	return (len > ifiq_maxlen_return);
748 }
749 
750 void
751 ifiq_add_data(struct ifiqueue *ifiq, struct if_data *data)
752 {
753 	mtx_enter(&ifiq->ifiq_mtx);
754 	data->ifi_ipackets += ifiq->ifiq_packets;
755 	data->ifi_ibytes += ifiq->ifiq_bytes;
756 	data->ifi_iqdrops += ifiq->ifiq_qdrops;
757 	mtx_leave(&ifiq->ifiq_mtx);
758 }
759 
760 int
761 ifiq_enqueue(struct ifiqueue *ifiq, struct mbuf *m)
762 {
763 	mtx_enter(&ifiq->ifiq_mtx);
764 	ml_enqueue(&ifiq->ifiq_ml, m);
765 	mtx_leave(&ifiq->ifiq_mtx);
766 
767 	task_add(ifiq->ifiq_softnet, &ifiq->ifiq_task);
768 
769 	return (0);
770 }
771 
772 static void
773 ifiq_process(void *arg)
774 {
775 	struct ifiqueue *ifiq = arg;
776 	struct mbuf_list ml;
777 
778 	if (ifiq_empty(ifiq))
779 		return;
780 
781 	mtx_enter(&ifiq->ifiq_mtx);
782 	ifiq->ifiq_dequeues++;
783 	ml = ifiq->ifiq_ml;
784 	ml_init(&ifiq->ifiq_ml);
785 	mtx_leave(&ifiq->ifiq_mtx);
786 
787 	if_input_process(ifiq->ifiq_if, &ml);
788 }
789 
790 int
791 net_ifiq_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp,
792     void *newp, size_t newlen)
793 {
794 	int error = EOPNOTSUPP;
795 /* pressure is disabled for 6.6-release */
796 #if 0
797 	int val;
798 
799 	if (namelen != 1)
800 		return (EISDIR);
801 
802 	switch (name[0]) {
803 	case NET_LINK_IFRXQ_PRESSURE_RETURN:
804 		val = ifiq_pressure_return;
805 		error = sysctl_int(oldp, oldlenp, newp, newlen, &val);
806 		if (error != 0)
807 			return (error);
808 		if (val < 1 || val > ifiq_pressure_drop)
809 			return (EINVAL);
810 		ifiq_pressure_return = val;
811 		break;
812 	case NET_LINK_IFRXQ_PRESSURE_DROP:
813 		val = ifiq_pressure_drop;
814 		error = sysctl_int(oldp, oldlenp, newp, newlen, &val);
815 		if (error != 0)
816 			return (error);
817 		if (ifiq_pressure_return > val)
818 			return (EINVAL);
819 		ifiq_pressure_drop = val;
820 		break;
821 	default:
822 		error = EOPNOTSUPP;
823 		break;
824 	}
825 #endif
826 
827 	return (error);
828 }
829 
830 /*
831  * priq implementation
832  */
833 
834 unsigned int
835 priq_idx(unsigned int nqueues, const struct mbuf *m)
836 {
837 	unsigned int flow = 0;
838 
839 	if (ISSET(m->m_pkthdr.csum_flags, M_FLOWID))
840 		flow = m->m_pkthdr.ph_flowid;
841 
842 	return (flow % nqueues);
843 }
844 
845 void *
846 priq_alloc(unsigned int idx, void *null)
847 {
848 	struct priq *pq;
849 	int i;
850 
851 	pq = malloc(sizeof(struct priq), M_DEVBUF, M_WAITOK);
852 	for (i = 0; i < IFQ_NQUEUES; i++)
853 		ml_init(&pq->pq_lists[i]);
854 	return (pq);
855 }
856 
857 void
858 priq_free(unsigned int idx, void *pq)
859 {
860 	free(pq, M_DEVBUF, sizeof(struct priq));
861 }
862 
863 struct mbuf *
864 priq_enq(struct ifqueue *ifq, struct mbuf *m)
865 {
866 	struct priq *pq;
867 	struct mbuf_list *pl;
868 	struct mbuf *n = NULL;
869 	unsigned int prio;
870 
871 	pq = ifq->ifq_q;
872 	KASSERT(m->m_pkthdr.pf.prio <= IFQ_MAXPRIO);
873 
874 	/* Find a lower priority queue to drop from */
875 	if (ifq_len(ifq) >= ifq->ifq_maxlen) {
876 		for (prio = 0; prio < m->m_pkthdr.pf.prio; prio++) {
877 			pl = &pq->pq_lists[prio];
878 			if (ml_len(pl) > 0) {
879 				n = ml_dequeue(pl);
880 				goto enqueue;
881 			}
882 		}
883 		/*
884 		 * There's no lower priority queue that we can
885 		 * drop from so don't enqueue this one.
886 		 */
887 		return (m);
888 	}
889 
890  enqueue:
891 	pl = &pq->pq_lists[m->m_pkthdr.pf.prio];
892 	ml_enqueue(pl, m);
893 
894 	return (n);
895 }
896 
897 struct mbuf *
898 priq_deq_begin(struct ifqueue *ifq, void **cookiep)
899 {
900 	struct priq *pq = ifq->ifq_q;
901 	struct mbuf_list *pl;
902 	unsigned int prio = nitems(pq->pq_lists);
903 	struct mbuf *m;
904 
905 	do {
906 		pl = &pq->pq_lists[--prio];
907 		m = MBUF_LIST_FIRST(pl);
908 		if (m != NULL) {
909 			*cookiep = pl;
910 			return (m);
911 		}
912 	} while (prio > 0);
913 
914 	return (NULL);
915 }
916 
917 void
918 priq_deq_commit(struct ifqueue *ifq, struct mbuf *m, void *cookie)
919 {
920 	struct mbuf_list *pl = cookie;
921 
922 	KASSERT(MBUF_LIST_FIRST(pl) == m);
923 
924 	ml_dequeue(pl);
925 }
926 
927 void
928 priq_purge(struct ifqueue *ifq, struct mbuf_list *ml)
929 {
930 	struct priq *pq = ifq->ifq_q;
931 	struct mbuf_list *pl;
932 	unsigned int prio = nitems(pq->pq_lists);
933 
934 	do {
935 		pl = &pq->pq_lists[--prio];
936 		ml_enlist(ml, pl);
937 	} while (prio > 0);
938 }
939