xref: /netbsd-src/sys/net/lagg/if_laggproto.c (revision 82d56013d7b633d116a93943de88e08335357a7c)
1 /*	$NetBSD: if_laggproto.c,v 1.2 2021/05/24 13:43:21 thorpej Exp $	*/
2 
3 /*-
4  * SPDX-License-Identifier: BSD-2-Clause-NetBSD
5  *
6  * Copyright (c)2021 Internet Initiative Japan, Inc.
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 #include <sys/cdefs.h>
32 __KERNEL_RCSID(0, "$NetBSD: if_laggproto.c,v 1.2 2021/05/24 13:43:21 thorpej Exp $");
33 
34 #include <sys/param.h>
35 #include <sys/types.h>
36 
37 #include <sys/evcnt.h>
38 #include <sys/kmem.h>
39 #include <sys/mbuf.h>
40 #include <sys/mutex.h>
41 #include <sys/pslist.h>
42 #include <sys/syslog.h>
43 #include <sys/workqueue.h>
44 
45 #include <net/if.h>
46 #include <net/if_ether.h>
47 #include <net/if_media.h>
48 
49 #include <net/lagg/if_lagg.h>
50 #include <net/lagg/if_laggproto.h>
51 
52 struct lagg_proto_softc {
53 	struct lagg_softc	*psc_softc;
54 	struct pslist_head	 psc_ports;
55 	kmutex_t		 psc_lock;
56 	pserialize_t		 psc_psz;
57 	size_t			 psc_ctxsiz;
58 	void			*psc_ctx;
59 	size_t			 psc_nactports;
60 };
61 
62 /*
63  * Locking notes:
64  * - Items of struct lagg_proto_softc is protected by
65  *   psc_lock (an adaptive mutex)
66  * - psc_ports is protected by pserialize (psc_psz)
67  *   - Updates of psc_ports is serialized by sc_lock in
68  *     struct lagg_softc
69  * - Other locking notes are described in if_laggproto.h
70  */
71 
72 struct lagg_failover {
73 	bool		 fo_rx_all;
74 };
75 
76 struct lagg_portmap {
77 	struct lagg_port	*pm_ports[LAGG_MAX_PORTS];
78 	size_t			 pm_nports;
79 };
80 
81 struct lagg_portmaps {
82 	struct lagg_portmap	 maps_pmap[2];
83 	size_t			 maps_activepmap;
84 };
85 
86 struct lagg_lb {
87 	struct lagg_portmaps	 lb_pmaps;
88 };
89 
90 struct lagg_proto_port {
91 	struct pslist_entry	 lpp_entry;
92 	struct lagg_port	*lpp_laggport;
93 	bool			 lpp_active;
94 };
95 
96 #define LAGG_PROTO_LOCK(_psc)	mutex_enter(&(_psc)->psc_lock)
97 #define LAGG_PROTO_UNLOCK(_psc)	mutex_exit(&(_psc)->psc_lock)
98 #define LAGG_PROTO_LOCKED(_psc)	mutex_owned(&(_psc)->psc_lock)
99 
100 static struct lagg_proto_softc *
101 		lagg_proto_alloc(lagg_proto, struct lagg_softc *);
102 static void	lagg_proto_free(struct lagg_proto_softc *);
103 static void	lagg_proto_insert_port(struct lagg_proto_softc *,
104 		    struct lagg_proto_port *);
105 static void	lagg_proto_remove_port(struct lagg_proto_softc *,
106 		    struct lagg_proto_port *);
107 static struct lagg_port *
108 		lagg_link_active(struct lagg_proto_softc *psc,
109 		    struct lagg_proto_port *, struct psref *);
110 
111 static inline struct lagg_portmap *
112 lagg_portmap_active(struct lagg_portmaps *maps)
113 {
114 	size_t i;
115 
116 	i = atomic_load_consume(&maps->maps_activepmap);
117 
118 	return &maps->maps_pmap[i];
119 }
120 
121 static inline struct lagg_portmap *
122 lagg_portmap_next(struct lagg_portmaps *maps)
123 {
124 	size_t i;
125 
126 	i = atomic_load_consume(&maps->maps_activepmap);
127 	i &= 0x1;
128 	i ^= 0x1;
129 
130 	return &maps->maps_pmap[i];
131 }
132 
133 static inline void
134 lagg_portmap_switch(struct lagg_portmaps *maps)
135 {
136 	size_t i;
137 
138 	i = atomic_load_consume(&maps->maps_activepmap);
139 	i &= 0x1;
140 	i ^= 0x1;
141 
142 	atomic_store_release(&maps->maps_activepmap, i);
143 }
144 
145 static struct lagg_proto_softc *
146 lagg_proto_alloc(lagg_proto pr, struct lagg_softc *sc)
147 {
148 	struct lagg_proto_softc *psc;
149 	size_t ctxsiz;
150 
151 	switch (pr) {
152 	case LAGG_PROTO_FAILOVER:
153 		ctxsiz = sizeof(struct lagg_failover);
154 		break;
155 	case LAGG_PROTO_LOADBALANCE:
156 		ctxsiz = sizeof(struct lagg_lb);
157 		break;
158 	default:
159 		ctxsiz = 0;
160 	}
161 
162 	psc = kmem_zalloc(sizeof(*psc), KM_NOSLEEP);
163 	if (psc == NULL)
164 		return NULL;
165 
166 	if (ctxsiz > 0) {
167 		psc->psc_ctx = kmem_zalloc(ctxsiz, KM_NOSLEEP);
168 		if (psc->psc_ctx == NULL) {
169 			kmem_free(psc, sizeof(*psc));
170 			return NULL;
171 		}
172 
173 		psc->psc_ctxsiz = ctxsiz;
174 	}
175 
176 	PSLIST_INIT(&psc->psc_ports);
177 	psc->psc_psz = pserialize_create();
178 	mutex_init(&psc->psc_lock, MUTEX_DEFAULT, IPL_SOFTNET);
179 	psc->psc_softc = sc;
180 
181 	return psc;
182 }
183 
184 static void
185 lagg_proto_free(struct lagg_proto_softc *psc)
186 {
187 
188 	pserialize_destroy(psc->psc_psz);
189 	mutex_destroy(&psc->psc_lock);
190 
191 	if (psc->psc_ctxsiz > 0)
192 		kmem_free(psc->psc_ctx, psc->psc_ctxsiz);
193 
194 	kmem_free(psc, sizeof(*psc));
195 }
196 
197 static struct lagg_port *
198 lagg_link_active(struct lagg_proto_softc *psc,
199     struct lagg_proto_port *pport, struct psref *psref)
200 {
201 	struct lagg_port *lp;
202 	int s;
203 
204 	lp = NULL;
205 	s = pserialize_read_enter();
206 
207 	for (;pport != NULL;
208 	    pport = PSLIST_READER_NEXT(pport,
209 	    struct lagg_proto_port, lpp_entry)) {
210 		if (atomic_load_relaxed(&pport->lpp_active)) {
211 			lp = pport->lpp_laggport;
212 			goto done;
213 		}
214 	}
215 
216 	PSLIST_READER_FOREACH(pport, &psc->psc_ports,
217 	    struct lagg_proto_port, lpp_entry) {
218 		if (atomic_load_relaxed(&pport->lpp_active)) {
219 			lp = pport->lpp_laggport;
220 			break;
221 		}
222 	}
223 done:
224 	if (lp != NULL)
225 		lagg_port_getref(lp, psref);
226 	pserialize_read_exit(s);
227 
228 	return lp;
229 }
230 
231 int
232 lagg_common_allocport(struct lagg_proto_softc *psc, struct lagg_port *lp)
233 {
234 	struct lagg_proto_port *pport;
235 
236 	KASSERT(LAGG_LOCKED(psc->psc_softc));
237 
238 	pport = kmem_zalloc(sizeof(*pport), KM_NOSLEEP);
239 	if (pport == NULL)
240 		return ENOMEM;
241 
242 	PSLIST_ENTRY_INIT(pport, lpp_entry);
243 	pport->lpp_laggport = lp;
244 	lp->lp_proto_ctx = (void *)pport;
245 	return 0;
246 }
247 
248 void
249 lagg_common_freeport(struct lagg_proto_softc *psc, struct lagg_port *lp)
250 {
251 	struct lagg_proto_port *pport;
252 
253 	pport = lp->lp_proto_ctx;
254 	lp->lp_proto_ctx = NULL;
255 
256 	kmem_free(pport, sizeof(*pport));
257 }
258 
259 static void
260 lagg_proto_insert_port(struct lagg_proto_softc *psc,
261     struct lagg_proto_port *pport)
262 {
263 	struct lagg_proto_port *pport0;
264 	struct lagg_port *lp, *lp0;
265 	bool insert_after;
266 
267 	insert_after = false;
268 	lp = pport->lpp_laggport;
269 
270 	LAGG_PROTO_LOCK(psc);
271 	PSLIST_WRITER_FOREACH(pport0, &psc->psc_ports,
272 	    struct lagg_proto_port, lpp_entry) {
273 		lp0 = pport0->lpp_laggport;
274 		if (lp0->lp_prio > lp->lp_prio)
275 			break;
276 
277 		if (PSLIST_WRITER_NEXT(pport0,
278 		    struct lagg_proto_port, lpp_entry) == NULL) {
279 			insert_after = true;
280 			break;
281 		}
282 	}
283 
284 	if (pport0 == NULL) {
285 		PSLIST_WRITER_INSERT_HEAD(&psc->psc_ports, pport,
286 		    lpp_entry);
287 	} else if (insert_after) {
288 		PSLIST_WRITER_INSERT_AFTER(pport0, pport, lpp_entry);
289 	} else {
290 		PSLIST_WRITER_INSERT_BEFORE(pport0, pport, lpp_entry);
291 	}
292 	LAGG_PROTO_UNLOCK(psc);
293 }
294 
295 static void
296 lagg_proto_remove_port(struct lagg_proto_softc *psc,
297     struct lagg_proto_port *pport)
298 {
299 
300 	LAGG_PROTO_LOCK(psc);
301 	PSLIST_WRITER_REMOVE(pport, lpp_entry);
302 	pserialize_perform(psc->psc_psz);
303 	LAGG_PROTO_UNLOCK(psc);
304 }
305 
306 void
307 lagg_common_startport(struct lagg_proto_softc *psc, struct lagg_port *lp)
308 {
309 	struct lagg_proto_port *pport;
310 
311 	pport = lp->lp_proto_ctx;
312 	lagg_proto_insert_port(psc, pport);
313 
314 	lagg_common_linkstate(psc, lp);
315 }
316 
317 void
318 lagg_common_stopport(struct lagg_proto_softc *psc, struct lagg_port *lp)
319 {
320 	struct lagg_proto_port *pport;
321 	struct ifnet *ifp;
322 
323 	pport = lp->lp_proto_ctx;
324 	lagg_proto_remove_port(psc, pport);
325 
326 	if (pport->lpp_active) {
327 		if (psc->psc_nactports > 0)
328 			psc->psc_nactports--;
329 
330 		if (psc->psc_nactports == 0) {
331 			ifp = &psc->psc_softc->sc_if;
332 			if_link_state_change(ifp, LINK_STATE_DOWN);
333 		}
334 
335 		pport->lpp_active = false;
336 	}
337 }
338 
339 void
340 lagg_common_linkstate(struct lagg_proto_softc *psc, struct lagg_port *lp)
341 {
342 	struct lagg_proto_port *pport;
343 	struct ifnet *ifp;
344 	bool is_active;
345 
346 	pport = lp->lp_proto_ctx;
347 	is_active = lagg_portactive(lp);
348 
349 	if (pport->lpp_active == is_active)
350 		return;
351 
352 	ifp = &psc->psc_softc->sc_if;
353 	if (is_active) {
354 		psc->psc_nactports++;
355 		if (psc->psc_nactports == 1)
356 			if_link_state_change(ifp, LINK_STATE_UP);
357 	} else {
358 		if (psc->psc_nactports > 0)
359 			psc->psc_nactports--;
360 
361 		if (psc->psc_nactports == 0)
362 			if_link_state_change(ifp, LINK_STATE_DOWN);
363 	}
364 
365 	atomic_store_relaxed(&pport->lpp_active, is_active);
366 }
367 
368 void
369 lagg_common_detach(struct lagg_proto_softc *psc)
370 {
371 
372 	lagg_proto_free(psc);
373 }
374 
375 int
376 lagg_none_attach(struct lagg_softc *sc, struct lagg_proto_softc **pscp)
377 {
378 
379 	*pscp = NULL;
380 	return 0;
381 }
382 
383 int
384 lagg_none_up(struct lagg_proto_softc *psc __unused)
385 {
386 
387 	return EBUSY;
388 }
389 
390 int
391 lagg_fail_attach(struct lagg_softc *sc, struct lagg_proto_softc **xpsc)
392 {
393 	struct lagg_proto_softc *psc;
394 	struct lagg_failover *fovr;
395 
396 	psc = lagg_proto_alloc(LAGG_PROTO_FAILOVER, sc);
397 	if (psc == NULL)
398 		return ENOMEM;
399 
400 	fovr = psc->psc_ctx;
401 	fovr->fo_rx_all = true;
402 
403 	*xpsc = psc;
404 	return 0;
405 }
406 
407 int
408 lagg_fail_transmit(struct lagg_proto_softc *psc, struct mbuf *m)
409 {
410 	struct ifnet *ifp;
411 	struct lagg_port *lp;
412 	struct psref psref;
413 
414 	lp = lagg_link_active(psc, NULL, &psref);
415 	if (lp == NULL) {
416 		ifp = &psc->psc_softc->sc_if;
417 		if_statinc(ifp, if_oerrors);
418 		m_freem(m);
419 		return ENOENT;
420 	}
421 
422 	lagg_enqueue(psc->psc_softc, lp, m);
423 	lagg_port_putref(lp, &psref);
424 	return 0;
425 }
426 
427 struct mbuf *
428 lagg_fail_input(struct lagg_proto_softc *psc, struct lagg_port *lp,
429     struct mbuf *m)
430 {
431 	struct lagg_failover *fovr;
432 	struct lagg_port *lp0;
433 	struct ifnet *ifp;
434 	struct psref psref;
435 
436 	fovr = psc->psc_ctx;
437 	if (atomic_load_relaxed(&fovr->fo_rx_all))
438 		return m;
439 
440 	lp0 = lagg_link_active(psc, NULL, &psref);
441 	if (lp0 == NULL) {
442 		goto drop;
443 	}
444 
445 	if (lp0 != lp) {
446 		lagg_port_putref(lp0, &psref);
447 		goto drop;
448 	}
449 
450 	lagg_port_putref(lp0, &psref);
451 
452 	return m;
453 drop:
454 	ifp = &psc->psc_softc->sc_if;
455 	if_statinc(ifp, if_ierrors);
456 	m_freem(m);
457 	return NULL;
458 }
459 
460 void
461 lagg_fail_portstat(struct lagg_proto_softc *psc, struct lagg_port *lp,
462     struct laggreqport *resp)
463 {
464 	struct lagg_failover *fovr;
465 	struct lagg_proto_port *pport;
466 	struct lagg_port *lp0;
467 	struct psref psref;
468 
469 	fovr = psc->psc_ctx;
470 	pport = lp->lp_proto_ctx;
471 
472 	if (pport->lpp_active) {
473 		SET(resp->rp_flags, LAGG_PORT_ACTIVE);
474 		if (fovr->fo_rx_all) {
475 			SET(resp->rp_flags, LAGG_PORT_COLLECTING);
476 		}
477 
478 		lp0 = lagg_link_active(psc, NULL, &psref);
479 		if (lp0 == lp) {
480 			SET(resp->rp_flags,
481 			    LAGG_PORT_COLLECTING | LAGG_PORT_DISTRIBUTING);
482 		}
483 		if (lp0 != NULL)
484 			lagg_port_putref(lp0, &psref);
485 	}
486 }
487 
488 int
489 lagg_fail_ioctl(struct lagg_proto_softc *psc, struct laggreqproto *lreq)
490 {
491 	struct lagg_failover *fovr;
492 	struct laggreq_fail *rpfail;
493 	int error;
494 	bool set;
495 
496 	error = 0;
497 	fovr = psc->psc_ctx;
498 	rpfail = &lreq->rp_fail;
499 
500 	switch (rpfail->command) {
501 	case LAGGIOC_FAILSETFLAGS:
502 	case LAGGIOC_FAILCLRFLAGS:
503 		set = (rpfail->command == LAGGIOC_FAILSETFLAGS) ?
504 			true : false;
505 
506 		if (ISSET(rpfail->flags, LAGGREQFAIL_RXALL))
507 			fovr->fo_rx_all = set;
508 		break;
509 	default:
510 		error = ENOTTY;
511 		break;
512 	}
513 
514 	return error;
515 }
516 
517 int
518 lagg_lb_attach(struct lagg_softc *sc, struct lagg_proto_softc **xpsc)
519 {
520 	struct lagg_proto_softc *psc;
521 	struct lagg_lb *lb;
522 
523 	psc = lagg_proto_alloc(LAGG_PROTO_LOADBALANCE, sc);
524 	if (psc == NULL)
525 		return ENOMEM;
526 
527 	lb = psc->psc_ctx;
528 	lb->lb_pmaps.maps_activepmap = 0;
529 
530 	*xpsc = psc;
531 	return 0;
532 }
533 
534 void
535 lagg_lb_startport(struct lagg_proto_softc *psc, struct lagg_port *lp)
536 {
537 	struct lagg_lb *lb;
538 	struct lagg_portmap *pm_act, *pm_next;
539 	size_t n;
540 
541 	lb = psc->psc_ctx;
542 	lagg_common_startport(psc, lp);
543 
544 	LAGG_PROTO_LOCK(psc);
545 	pm_act = lagg_portmap_active(&lb->lb_pmaps);
546 	pm_next = lagg_portmap_next(&lb->lb_pmaps);
547 
548 	*pm_next = *pm_act;
549 
550 	n = pm_next->pm_nports;
551 	pm_next->pm_ports[n] = lp;
552 
553 	n++;
554 	pm_next->pm_nports = n;
555 
556 	lagg_portmap_switch(&lb->lb_pmaps);
557 	pserialize_perform(psc->psc_psz);
558 	LAGG_PROTO_UNLOCK(psc);
559 }
560 
561 void
562 lagg_lb_stopport(struct lagg_proto_softc *psc, struct lagg_port *lp)
563 {
564 	struct lagg_lb *lb;
565 	struct lagg_portmap *pm_act, *pm_next;
566 	size_t i, n;
567 
568 	lb = psc->psc_ctx;
569 
570 	LAGG_PROTO_LOCK(psc);
571 	pm_act = lagg_portmap_active(&lb->lb_pmaps);
572 	pm_next = lagg_portmap_next(&lb->lb_pmaps);
573 	n = 0;
574 
575 	for (i = 0; i < pm_act->pm_nports; i++) {
576 		if (pm_act->pm_ports[i] == lp)
577 			continue;
578 
579 		pm_next->pm_ports[n] = pm_act->pm_ports[i];
580 		n++;
581 	}
582 
583 	lagg_portmap_switch(&lb->lb_pmaps);
584 	pserialize_perform(psc->psc_psz);
585 	LAGG_PROTO_UNLOCK(psc);
586 
587 	lagg_common_stopport(psc, lp);
588 }
589 
590 int
591 lagg_lb_transmit(struct lagg_proto_softc *psc, struct mbuf *m)
592 {
593 	struct lagg_lb *lb;
594 	struct lagg_portmap *pm;
595 	struct lagg_port *lp, *lp0;
596 	struct ifnet *ifp;
597 	struct psref psref;
598 	uint32_t hash;
599 	int s;
600 
601 	lb = psc->psc_ctx;
602 	hash  = lagg_hashmbuf(psc->psc_softc, m);
603 
604 	s = pserialize_read_enter();
605 
606 	pm = lagg_portmap_active(&lb->lb_pmaps);
607 	hash %= pm->pm_nports;
608 	lp0 = pm->pm_ports[hash];
609 	lp = lagg_link_active(psc, lp0->lp_proto_ctx, &psref);
610 
611 	pserialize_read_exit(s);
612 
613 	if (__predict_false(lp == NULL)) {
614 		ifp = &psc->psc_softc->sc_if;
615 		if_statinc(ifp, if_oerrors);
616 		m_freem(m);
617 		return ENOENT;
618 	}
619 
620 	lagg_enqueue(psc->psc_softc, lp, m);
621 	lagg_port_putref(lp, &psref);
622 
623 	return 0;
624 }
625 
626 struct mbuf *
627 lagg_lb_input(struct lagg_proto_softc *psc __unused,
628     struct lagg_port *lp __unused, struct mbuf *m)
629 {
630 
631 	return m;
632 }
633 
634 void
635 lagg_lb_portstat(struct lagg_proto_softc *psc, struct lagg_port *lp,
636     struct laggreqport *resp)
637 {
638 	struct lagg_proto_port *pport;
639 
640 	pport = lp->lp_proto_ctx;
641 
642 	if (pport->lpp_active) {
643 		SET(resp->rp_flags, LAGG_PORT_ACTIVE |
644 		    LAGG_PORT_COLLECTING | LAGG_PORT_DISTRIBUTING);
645 	}
646 }
647