xref: /dflybsd-src/sys/net/if_ethersubr.c (revision c7c26905e41d70fe0ea9d02c5af1fc0a688408d4)
1 /*
2  * Copyright (c) 1982, 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	@(#)if_ethersubr.c	8.1 (Berkeley) 6/10/93
34  * $FreeBSD: src/sys/net/if_ethersubr.c,v 1.70.2.33 2003/04/28 15:45:53 archie Exp $
35  */
36 
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39 #include "opt_ipx.h"
40 #include "opt_mpls.h"
41 #include "opt_netgraph.h"
42 #include "opt_carp.h"
43 #include "opt_rss.h"
44 
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/globaldata.h>
48 #include <sys/kernel.h>
49 #include <sys/ktr.h>
50 #include <sys/lock.h>
51 #include <sys/malloc.h>
52 #include <sys/mbuf.h>
53 #include <sys/msgport.h>
54 #include <sys/socket.h>
55 #include <sys/sockio.h>
56 #include <sys/sysctl.h>
57 #include <sys/thread.h>
58 
59 #include <sys/thread2.h>
60 #include <sys/mplock2.h>
61 
62 #include <net/if.h>
63 #include <net/netisr.h>
64 #include <net/route.h>
65 #include <net/if_llc.h>
66 #include <net/if_dl.h>
67 #include <net/if_types.h>
68 #include <net/ifq_var.h>
69 #include <net/bpf.h>
70 #include <net/ethernet.h>
71 #include <net/vlan/if_vlan_ether.h>
72 #include <net/vlan/if_vlan_var.h>
73 #include <net/netmsg2.h>
74 
75 #if defined(INET) || defined(INET6)
76 #include <netinet/in.h>
77 #include <netinet/ip_var.h>
78 #include <netinet/tcp_var.h>
79 #include <netinet/if_ether.h>
80 #include <netinet/ip_flow.h>
81 #include <net/ipfw/ip_fw.h>
82 #include <net/dummynet/ip_dummynet.h>
83 #endif
84 #ifdef INET6
85 #include <netinet6/nd6.h>
86 #endif
87 
88 #ifdef CARP
89 #include <netinet/ip_carp.h>
90 #endif
91 
92 #ifdef IPX
93 #include <netproto/ipx/ipx.h>
94 #include <netproto/ipx/ipx_if.h>
95 int (*ef_inputp)(struct ifnet*, const struct ether_header *eh, struct mbuf *m);
96 int (*ef_outputp)(struct ifnet *ifp, struct mbuf **mp, struct sockaddr *dst,
97 		  short *tp, int *hlen);
98 #endif
99 
100 #ifdef MPLS
101 #include <netproto/mpls/mpls.h>
102 #endif
103 
104 /* netgraph node hooks for ng_ether(4) */
105 void	(*ng_ether_input_p)(struct ifnet *ifp, struct mbuf **mp);
106 void	(*ng_ether_input_orphan_p)(struct ifnet *ifp, struct mbuf *m);
107 int	(*ng_ether_output_p)(struct ifnet *ifp, struct mbuf **mp);
108 void	(*ng_ether_attach_p)(struct ifnet *ifp);
109 void	(*ng_ether_detach_p)(struct ifnet *ifp);
110 
111 void	(*vlan_input_p)(struct mbuf *);
112 
113 static int ether_output(struct ifnet *, struct mbuf *, struct sockaddr *,
114 			struct rtentry *);
115 static void ether_restore_header(struct mbuf **, const struct ether_header *,
116 				 const struct ether_header *);
117 static int ether_characterize(struct mbuf **);
118 
119 /*
120  * if_bridge support
121  */
122 struct mbuf *(*bridge_input_p)(struct ifnet *, struct mbuf *);
123 int (*bridge_output_p)(struct ifnet *, struct mbuf *);
124 void (*bridge_dn_p)(struct mbuf *, struct ifnet *);
125 struct ifnet *(*bridge_interface_p)(void *if_bridge);
126 
127 static int ether_resolvemulti(struct ifnet *, struct sockaddr **,
128 			      struct sockaddr *);
129 
130 const uint8_t etherbroadcastaddr[ETHER_ADDR_LEN] = {
131 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff
132 };
133 
134 #define gotoerr(e) do { error = (e); goto bad; } while (0)
135 #define IFP2AC(ifp) ((struct arpcom *)(ifp))
136 
137 static boolean_t ether_ipfw_chk(struct mbuf **m0, struct ifnet *dst,
138 				struct ip_fw **rule,
139 				const struct ether_header *eh);
140 
141 static int ether_ipfw;
142 static u_long ether_restore_hdr;
143 static u_long ether_prepend_hdr;
144 static u_long ether_input_wronghash;
145 static int ether_debug;
146 
147 #ifdef RSS_DEBUG
148 static u_long ether_pktinfo_try;
149 static u_long ether_pktinfo_hit;
150 static u_long ether_rss_nopi;
151 static u_long ether_rss_nohash;
152 static u_long ether_input_requeue;
153 #endif
154 
155 SYSCTL_DECL(_net_link);
156 SYSCTL_NODE(_net_link, IFT_ETHER, ether, CTLFLAG_RW, 0, "Ethernet");
157 SYSCTL_INT(_net_link_ether, OID_AUTO, debug, CTLFLAG_RW,
158     &ether_debug, 0, "Ether debug");
159 SYSCTL_INT(_net_link_ether, OID_AUTO, ipfw, CTLFLAG_RW,
160     &ether_ipfw, 0, "Pass ether pkts through firewall");
161 SYSCTL_ULONG(_net_link_ether, OID_AUTO, restore_hdr, CTLFLAG_RW,
162     &ether_restore_hdr, 0, "# of ether header restoration");
163 SYSCTL_ULONG(_net_link_ether, OID_AUTO, prepend_hdr, CTLFLAG_RW,
164     &ether_prepend_hdr, 0,
165     "# of ether header restoration which prepends mbuf");
166 SYSCTL_ULONG(_net_link_ether, OID_AUTO, input_wronghash, CTLFLAG_RW,
167     &ether_input_wronghash, 0, "# of input packets with wrong hash");
168 #ifdef RSS_DEBUG
169 SYSCTL_ULONG(_net_link_ether, OID_AUTO, rss_nopi, CTLFLAG_RW,
170     &ether_rss_nopi, 0, "# of packets do not have pktinfo");
171 SYSCTL_ULONG(_net_link_ether, OID_AUTO, rss_nohash, CTLFLAG_RW,
172     &ether_rss_nohash, 0, "# of packets do not have hash");
173 SYSCTL_ULONG(_net_link_ether, OID_AUTO, pktinfo_try, CTLFLAG_RW,
174     &ether_pktinfo_try, 0,
175     "# of tries to find packets' msgport using pktinfo");
176 SYSCTL_ULONG(_net_link_ether, OID_AUTO, pktinfo_hit, CTLFLAG_RW,
177     &ether_pktinfo_hit, 0,
178     "# of packets whose msgport are found using pktinfo");
179 SYSCTL_ULONG(_net_link_ether, OID_AUTO, input_requeue, CTLFLAG_RW,
180     &ether_input_requeue, 0, "# of input packets gets requeued");
181 #endif
182 
183 #define ETHER_KTR_STR		"ifp=%p"
184 #define ETHER_KTR_ARGS	struct ifnet *ifp
185 #ifndef KTR_ETHERNET
186 #define KTR_ETHERNET		KTR_ALL
187 #endif
188 KTR_INFO_MASTER(ether);
189 KTR_INFO(KTR_ETHERNET, ether, pkt_beg, 0, ETHER_KTR_STR, ETHER_KTR_ARGS);
190 KTR_INFO(KTR_ETHERNET, ether, pkt_end, 1, ETHER_KTR_STR, ETHER_KTR_ARGS);
191 KTR_INFO(KTR_ETHERNET, ether, disp_beg, 2, ETHER_KTR_STR, ETHER_KTR_ARGS);
192 KTR_INFO(KTR_ETHERNET, ether, disp_end, 3, ETHER_KTR_STR, ETHER_KTR_ARGS);
193 #define logether(name, arg)	KTR_LOG(ether_ ## name, arg)
194 
195 /*
196  * Ethernet output routine.
197  * Encapsulate a packet of type family for the local net.
198  * Use trailer local net encapsulation if enough data in first
199  * packet leaves a multiple of 512 bytes of data in remainder.
200  * Assumes that ifp is actually pointer to arpcom structure.
201  */
202 static int
203 ether_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
204 	     struct rtentry *rt)
205 {
206 	struct ether_header *eh, *deh;
207 	u_char *edst;
208 	int loop_copy = 0;
209 	int hlen = ETHER_HDR_LEN;	/* link layer header length */
210 	struct arpcom *ac = IFP2AC(ifp);
211 	int error;
212 
213 	ASSERT_IFNET_NOT_SERIALIZED_ALL(ifp);
214 
215 	if (ifp->if_flags & IFF_MONITOR)
216 		gotoerr(ENETDOWN);
217 	if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) != (IFF_UP | IFF_RUNNING))
218 		gotoerr(ENETDOWN);
219 
220 	M_PREPEND(m, sizeof(struct ether_header), MB_DONTWAIT);
221 	if (m == NULL)
222 		return (ENOBUFS);
223 	m->m_pkthdr.csum_lhlen = sizeof(struct ether_header);
224 	eh = mtod(m, struct ether_header *);
225 	edst = eh->ether_dhost;
226 
227 	/*
228 	 * Fill in the destination ethernet address and frame type.
229 	 */
230 	switch (dst->sa_family) {
231 #ifdef INET
232 	case AF_INET:
233 		if (!arpresolve(ifp, rt, m, dst, edst))
234 			return (0);	/* if not yet resolved */
235 #ifdef MPLS
236 		if (m->m_flags & M_MPLSLABELED)
237 			eh->ether_type = htons(ETHERTYPE_MPLS);
238 		else
239 #endif
240 			eh->ether_type = htons(ETHERTYPE_IP);
241 		break;
242 #endif
243 #ifdef INET6
244 	case AF_INET6:
245 		if (!nd6_storelladdr(&ac->ac_if, rt, m, dst, edst))
246 			return (0);		/* Something bad happenned. */
247 		eh->ether_type = htons(ETHERTYPE_IPV6);
248 		break;
249 #endif
250 #ifdef IPX
251 	case AF_IPX:
252 		if (ef_outputp != NULL) {
253 			/*
254 			 * Hold BGL and recheck ef_outputp
255 			 */
256 			get_mplock();
257 			if (ef_outputp != NULL) {
258 				error = ef_outputp(ifp, &m, dst,
259 						   &eh->ether_type, &hlen);
260 				rel_mplock();
261 				if (error)
262 					goto bad;
263 				else
264 					break;
265 			}
266 			rel_mplock();
267 		}
268 		eh->ether_type = htons(ETHERTYPE_IPX);
269 		bcopy(&(((struct sockaddr_ipx *)dst)->sipx_addr.x_host),
270 		      edst, ETHER_ADDR_LEN);
271 		break;
272 #endif
273 	case pseudo_AF_HDRCMPLT:
274 	case AF_UNSPEC:
275 		loop_copy = -1; /* if this is for us, don't do it */
276 		deh = (struct ether_header *)dst->sa_data;
277 		memcpy(edst, deh->ether_dhost, ETHER_ADDR_LEN);
278 		eh->ether_type = deh->ether_type;
279 		break;
280 
281 	default:
282 		if_printf(ifp, "can't handle af%d\n", dst->sa_family);
283 		gotoerr(EAFNOSUPPORT);
284 	}
285 
286 	if (dst->sa_family == pseudo_AF_HDRCMPLT)	/* unlikely */
287 		memcpy(eh->ether_shost,
288 		       ((struct ether_header *)dst->sa_data)->ether_shost,
289 		       ETHER_ADDR_LEN);
290 	else
291 		memcpy(eh->ether_shost, ac->ac_enaddr, ETHER_ADDR_LEN);
292 
293 	/*
294 	 * Bridges require special output handling.
295 	 */
296 	if (ifp->if_bridge) {
297 		KASSERT(bridge_output_p != NULL,
298 			("%s: if_bridge not loaded!", __func__));
299 		return bridge_output_p(ifp, m);
300 	}
301 
302 	/*
303 	 * If a simplex interface, and the packet is being sent to our
304 	 * Ethernet address or a broadcast address, loopback a copy.
305 	 * XXX To make a simplex device behave exactly like a duplex
306 	 * device, we should copy in the case of sending to our own
307 	 * ethernet address (thus letting the original actually appear
308 	 * on the wire). However, we don't do that here for security
309 	 * reasons and compatibility with the original behavior.
310 	 */
311 	if ((ifp->if_flags & IFF_SIMPLEX) && (loop_copy != -1)) {
312 		int csum_flags = 0;
313 
314 		if (m->m_pkthdr.csum_flags & CSUM_IP)
315 			csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID);
316 		if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA)
317 			csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
318 		if ((m->m_flags & M_BCAST) || (loop_copy > 0)) {
319 			struct mbuf *n;
320 
321 			if ((n = m_copypacket(m, MB_DONTWAIT)) != NULL) {
322 				n->m_pkthdr.csum_flags |= csum_flags;
323 				if (csum_flags & CSUM_DATA_VALID)
324 					n->m_pkthdr.csum_data = 0xffff;
325 				if_simloop(ifp, n, dst->sa_family, hlen);
326 			} else
327 				ifp->if_iqdrops++;
328 		} else if (bcmp(eh->ether_dhost, eh->ether_shost,
329 				ETHER_ADDR_LEN) == 0) {
330 			m->m_pkthdr.csum_flags |= csum_flags;
331 			if (csum_flags & CSUM_DATA_VALID)
332 				m->m_pkthdr.csum_data = 0xffff;
333 			if_simloop(ifp, m, dst->sa_family, hlen);
334 			return (0);	/* XXX */
335 		}
336 	}
337 
338 #ifdef CARP
339 	if (ifp->if_type == IFT_CARP) {
340 		ifp = carp_parent(ifp);
341 		if (ifp == NULL)
342 			gotoerr(ENETUNREACH);
343 
344 		ac = IFP2AC(ifp);
345 
346 		/*
347 		 * Check precondition again
348 		 */
349 		ASSERT_IFNET_NOT_SERIALIZED_ALL(ifp);
350 
351 		if (ifp->if_flags & IFF_MONITOR)
352 			gotoerr(ENETDOWN);
353 		if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) !=
354 		    (IFF_UP | IFF_RUNNING))
355 			gotoerr(ENETDOWN);
356 	}
357 #endif
358 
359 	/* Handle ng_ether(4) processing, if any */
360 	if (ng_ether_output_p != NULL) {
361 		/*
362 		 * Hold BGL and recheck ng_ether_output_p
363 		 */
364 		get_mplock();
365 		if (ng_ether_output_p != NULL) {
366 			if ((error = ng_ether_output_p(ifp, &m)) != 0) {
367 				rel_mplock();
368 				goto bad;
369 			}
370 			if (m == NULL) {
371 				rel_mplock();
372 				return (0);
373 			}
374 		}
375 		rel_mplock();
376 	}
377 
378 	/* Continue with link-layer output */
379 	return ether_output_frame(ifp, m);
380 
381 bad:
382 	m_freem(m);
383 	return (error);
384 }
385 
386 /*
387  * Returns the bridge interface an ifp is associated
388  * with.
389  *
390  * Only call if ifp->if_bridge != NULL.
391  */
392 struct ifnet *
393 ether_bridge_interface(struct ifnet *ifp)
394 {
395 	if (bridge_interface_p)
396 		return(bridge_interface_p(ifp->if_bridge));
397 	return (ifp);
398 }
399 
400 /*
401  * Ethernet link layer output routine to send a raw frame to the device.
402  *
403  * This assumes that the 14 byte Ethernet header is present and contiguous
404  * in the first mbuf.
405  */
406 int
407 ether_output_frame(struct ifnet *ifp, struct mbuf *m)
408 {
409 	struct ip_fw *rule = NULL;
410 	int error = 0;
411 	struct altq_pktattr pktattr;
412 
413 	ASSERT_IFNET_NOT_SERIALIZED_ALL(ifp);
414 
415 	if (m->m_pkthdr.fw_flags & DUMMYNET_MBUF_TAGGED) {
416 		struct m_tag *mtag;
417 
418 		/* Extract info from dummynet tag */
419 		mtag = m_tag_find(m, PACKET_TAG_DUMMYNET, NULL);
420 		KKASSERT(mtag != NULL);
421 		rule = ((struct dn_pkt *)m_tag_data(mtag))->dn_priv;
422 		KKASSERT(rule != NULL);
423 
424 		m_tag_delete(m, mtag);
425 		m->m_pkthdr.fw_flags &= ~DUMMYNET_MBUF_TAGGED;
426 	}
427 
428 	if (ifq_is_enabled(&ifp->if_snd))
429 		altq_etherclassify(&ifp->if_snd, m, &pktattr);
430 	crit_enter();
431 	if (IPFW_LOADED && ether_ipfw != 0) {
432 		struct ether_header save_eh, *eh;
433 
434 		eh = mtod(m, struct ether_header *);
435 		save_eh = *eh;
436 		m_adj(m, ETHER_HDR_LEN);
437 		if (!ether_ipfw_chk(&m, ifp, &rule, eh)) {
438 			crit_exit();
439 			if (m != NULL) {
440 				m_freem(m);
441 				return ENOBUFS; /* pkt dropped */
442 			} else
443 				return 0;	/* consumed e.g. in a pipe */
444 		}
445 
446 		/* packet was ok, restore the ethernet header */
447 		ether_restore_header(&m, eh, &save_eh);
448 		if (m == NULL) {
449 			crit_exit();
450 			return ENOBUFS;
451 		}
452 	}
453 	crit_exit();
454 
455 	/*
456 	 * Queue message on interface, update output statistics if
457 	 * successful, and start output if interface not yet active.
458 	 */
459 	error = ifq_dispatch(ifp, m, &pktattr);
460 	return (error);
461 }
462 
463 /*
464  * ipfw processing for ethernet packets (in and out).
465  * The second parameter is NULL from ether_demux(), and ifp from
466  * ether_output_frame().
467  */
468 static boolean_t
469 ether_ipfw_chk(struct mbuf **m0, struct ifnet *dst, struct ip_fw **rule,
470 	       const struct ether_header *eh)
471 {
472 	struct ether_header save_eh = *eh;	/* might be a ptr in *m0 */
473 	struct ip_fw_args args;
474 	struct m_tag *mtag;
475 	struct mbuf *m;
476 	int i;
477 
478 	if (*rule != NULL && fw_one_pass)
479 		return TRUE; /* dummynet packet, already partially processed */
480 
481 	/*
482 	 * I need some amount of data to be contiguous.
483 	 */
484 	i = min((*m0)->m_pkthdr.len, max_protohdr);
485 	if ((*m0)->m_len < i) {
486 		*m0 = m_pullup(*m0, i);
487 		if (*m0 == NULL)
488 			return FALSE;
489 	}
490 
491 	/*
492 	 * Clean up tags
493 	 */
494 	if ((mtag = m_tag_find(*m0, PACKET_TAG_IPFW_DIVERT, NULL)) != NULL)
495 		m_tag_delete(*m0, mtag);
496 	if ((*m0)->m_pkthdr.fw_flags & IPFORWARD_MBUF_TAGGED) {
497 		mtag = m_tag_find(*m0, PACKET_TAG_IPFORWARD, NULL);
498 		KKASSERT(mtag != NULL);
499 		m_tag_delete(*m0, mtag);
500 		(*m0)->m_pkthdr.fw_flags &= ~IPFORWARD_MBUF_TAGGED;
501 	}
502 
503 	args.m = *m0;		/* the packet we are looking at		*/
504 	args.oif = dst;		/* destination, if any			*/
505 	args.rule = *rule;	/* matching rule to restart		*/
506 	args.eh = &save_eh;	/* MAC header for bridged/MAC packets	*/
507 	i = ip_fw_chk_ptr(&args);
508 	*m0 = args.m;
509 	*rule = args.rule;
510 
511 	if (*m0 == NULL)
512 		return FALSE;
513 
514 	switch (i) {
515 	case IP_FW_PASS:
516 		return TRUE;
517 
518 	case IP_FW_DIVERT:
519 	case IP_FW_TEE:
520 	case IP_FW_DENY:
521 		/*
522 		 * XXX at some point add support for divert/forward actions.
523 		 * If none of the above matches, we have to drop the pkt.
524 		 */
525 		return FALSE;
526 
527 	case IP_FW_DUMMYNET:
528 		/*
529 		 * Pass the pkt to dummynet, which consumes it.
530 		 */
531 		m = *m0;	/* pass the original to dummynet */
532 		*m0 = NULL;	/* and nothing back to the caller */
533 
534 		ether_restore_header(&m, eh, &save_eh);
535 		if (m == NULL)
536 			return FALSE;
537 
538 		ip_fw_dn_io_ptr(m, args.cookie,
539 				dst ? DN_TO_ETH_OUT: DN_TO_ETH_DEMUX, &args);
540 		ip_dn_queue(m);
541 		return FALSE;
542 
543 	default:
544 		panic("unknown ipfw return value: %d", i);
545 	}
546 }
547 
548 static void
549 ether_input(struct ifnet *ifp, struct mbuf *m)
550 {
551 	ether_input_pkt(ifp, m, NULL);
552 }
553 
554 /*
555  * Perform common duties while attaching to interface list
556  */
557 void
558 ether_ifattach(struct ifnet *ifp, uint8_t *lla, lwkt_serialize_t serializer)
559 {
560 	ether_ifattach_bpf(ifp, lla, DLT_EN10MB, sizeof(struct ether_header),
561 			   serializer);
562 }
563 
564 void
565 ether_ifattach_bpf(struct ifnet *ifp, uint8_t *lla, u_int dlt, u_int hdrlen,
566 		   lwkt_serialize_t serializer)
567 {
568 	struct sockaddr_dl *sdl;
569 
570 	ifp->if_type = IFT_ETHER;
571 	ifp->if_addrlen = ETHER_ADDR_LEN;
572 	ifp->if_hdrlen = ETHER_HDR_LEN;
573 	if_attach(ifp, serializer);
574 	ifp->if_mtu = ETHERMTU;
575 	if (ifp->if_baudrate == 0)
576 		ifp->if_baudrate = 10000000;
577 	ifp->if_output = ether_output;
578 	ifp->if_input = ether_input;
579 	ifp->if_resolvemulti = ether_resolvemulti;
580 	ifp->if_broadcastaddr = etherbroadcastaddr;
581 	sdl = IF_LLSOCKADDR(ifp);
582 	sdl->sdl_type = IFT_ETHER;
583 	sdl->sdl_alen = ifp->if_addrlen;
584 	bcopy(lla, LLADDR(sdl), ifp->if_addrlen);
585 	/*
586 	 * XXX Keep the current drivers happy.
587 	 * XXX Remove once all drivers have been cleaned up
588 	 */
589 	if (lla != IFP2AC(ifp)->ac_enaddr)
590 		bcopy(lla, IFP2AC(ifp)->ac_enaddr, ifp->if_addrlen);
591 	bpfattach(ifp, dlt, hdrlen);
592 	if (ng_ether_attach_p != NULL)
593 		(*ng_ether_attach_p)(ifp);
594 
595 	if_printf(ifp, "MAC address: %6D\n", lla, ":");
596 }
597 
598 /*
599  * Perform common duties while detaching an Ethernet interface
600  */
601 void
602 ether_ifdetach(struct ifnet *ifp)
603 {
604 	if_down(ifp);
605 
606 	if (ng_ether_detach_p != NULL)
607 		(*ng_ether_detach_p)(ifp);
608 	bpfdetach(ifp);
609 	if_detach(ifp);
610 }
611 
612 int
613 ether_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
614 {
615 	struct ifaddr *ifa = (struct ifaddr *) data;
616 	struct ifreq *ifr = (struct ifreq *) data;
617 	int error = 0;
618 
619 #define IF_INIT(ifp) \
620 do { \
621 	if (((ifp)->if_flags & IFF_UP) == 0) { \
622 		(ifp)->if_flags |= IFF_UP; \
623 		(ifp)->if_init((ifp)->if_softc); \
624 	} \
625 } while (0)
626 
627 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
628 
629 	switch (command) {
630 	case SIOCSIFADDR:
631 		switch (ifa->ifa_addr->sa_family) {
632 #ifdef INET
633 		case AF_INET:
634 			IF_INIT(ifp);	/* before arpwhohas */
635 			arp_ifinit(ifp, ifa);
636 			break;
637 #endif
638 #ifdef IPX
639 		/*
640 		 * XXX - This code is probably wrong
641 		 */
642 		case AF_IPX:
643 			{
644 			struct ipx_addr *ina = &IA_SIPX(ifa)->sipx_addr;
645 			struct arpcom *ac = IFP2AC(ifp);
646 
647 			if (ipx_nullhost(*ina))
648 				ina->x_host = *(union ipx_host *) ac->ac_enaddr;
649 			else
650 				bcopy(ina->x_host.c_host, ac->ac_enaddr,
651 				      sizeof ac->ac_enaddr);
652 
653 			IF_INIT(ifp);	/* Set new address. */
654 			break;
655 			}
656 #endif
657 		default:
658 			IF_INIT(ifp);
659 			break;
660 		}
661 		break;
662 
663 	case SIOCGIFADDR:
664 		bcopy(IFP2AC(ifp)->ac_enaddr,
665 		      ((struct sockaddr *)ifr->ifr_data)->sa_data,
666 		      ETHER_ADDR_LEN);
667 		break;
668 
669 	case SIOCSIFMTU:
670 		/*
671 		 * Set the interface MTU.
672 		 */
673 		if (ifr->ifr_mtu > ETHERMTU) {
674 			error = EINVAL;
675 		} else {
676 			ifp->if_mtu = ifr->ifr_mtu;
677 		}
678 		break;
679 	default:
680 		error = EINVAL;
681 		break;
682 	}
683 	return (error);
684 
685 #undef IF_INIT
686 }
687 
688 int
689 ether_resolvemulti(
690 	struct ifnet *ifp,
691 	struct sockaddr **llsa,
692 	struct sockaddr *sa)
693 {
694 	struct sockaddr_dl *sdl;
695 #ifdef INET
696 	struct sockaddr_in *sin;
697 #endif
698 #ifdef INET6
699 	struct sockaddr_in6 *sin6;
700 #endif
701 	u_char *e_addr;
702 
703 	switch(sa->sa_family) {
704 	case AF_LINK:
705 		/*
706 		 * No mapping needed. Just check that it's a valid MC address.
707 		 */
708 		sdl = (struct sockaddr_dl *)sa;
709 		e_addr = LLADDR(sdl);
710 		if ((e_addr[0] & 1) != 1)
711 			return EADDRNOTAVAIL;
712 		*llsa = NULL;
713 		return 0;
714 
715 #ifdef INET
716 	case AF_INET:
717 		sin = (struct sockaddr_in *)sa;
718 		if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
719 			return EADDRNOTAVAIL;
720 		sdl = kmalloc(sizeof *sdl, M_IFMADDR, M_WAITOK | M_ZERO);
721 		sdl->sdl_len = sizeof *sdl;
722 		sdl->sdl_family = AF_LINK;
723 		sdl->sdl_index = ifp->if_index;
724 		sdl->sdl_type = IFT_ETHER;
725 		sdl->sdl_alen = ETHER_ADDR_LEN;
726 		e_addr = LLADDR(sdl);
727 		ETHER_MAP_IP_MULTICAST(&sin->sin_addr, e_addr);
728 		*llsa = (struct sockaddr *)sdl;
729 		return 0;
730 #endif
731 #ifdef INET6
732 	case AF_INET6:
733 		sin6 = (struct sockaddr_in6 *)sa;
734 		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
735 			/*
736 			 * An IP6 address of 0 means listen to all
737 			 * of the Ethernet multicast address used for IP6.
738 			 * (This is used for multicast routers.)
739 			 */
740 			ifp->if_flags |= IFF_ALLMULTI;
741 			*llsa = NULL;
742 			return 0;
743 		}
744 		if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
745 			return EADDRNOTAVAIL;
746 		sdl = kmalloc(sizeof *sdl, M_IFMADDR, M_WAITOK | M_ZERO);
747 		sdl->sdl_len = sizeof *sdl;
748 		sdl->sdl_family = AF_LINK;
749 		sdl->sdl_index = ifp->if_index;
750 		sdl->sdl_type = IFT_ETHER;
751 		sdl->sdl_alen = ETHER_ADDR_LEN;
752 		e_addr = LLADDR(sdl);
753 		ETHER_MAP_IPV6_MULTICAST(&sin6->sin6_addr, e_addr);
754 		*llsa = (struct sockaddr *)sdl;
755 		return 0;
756 #endif
757 
758 	default:
759 		/*
760 		 * Well, the text isn't quite right, but it's the name
761 		 * that counts...
762 		 */
763 		return EAFNOSUPPORT;
764 	}
765 }
766 
767 #if 0
768 /*
769  * This is for reference.  We have a table-driven version
770  * of the little-endian crc32 generator, which is faster
771  * than the double-loop.
772  */
773 uint32_t
774 ether_crc32_le(const uint8_t *buf, size_t len)
775 {
776 	uint32_t c, crc, carry;
777 	size_t i, j;
778 
779 	crc = 0xffffffffU;	/* initial value */
780 
781 	for (i = 0; i < len; i++) {
782 		c = buf[i];
783 		for (j = 0; j < 8; j++) {
784 			carry = ((crc & 0x01) ? 1 : 0) ^ (c & 0x01);
785 			crc >>= 1;
786 			c >>= 1;
787 			if (carry)
788 				crc = (crc ^ ETHER_CRC_POLY_LE);
789 		}
790 	}
791 
792 	return (crc);
793 }
794 #else
795 uint32_t
796 ether_crc32_le(const uint8_t *buf, size_t len)
797 {
798 	static const uint32_t crctab[] = {
799 		0x00000000, 0x1db71064, 0x3b6e20c8, 0x26d930ac,
800 		0x76dc4190, 0x6b6b51f4, 0x4db26158, 0x5005713c,
801 		0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c,
802 		0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c
803 	};
804 	uint32_t crc;
805 	size_t i;
806 
807 	crc = 0xffffffffU;	/* initial value */
808 
809 	for (i = 0; i < len; i++) {
810 		crc ^= buf[i];
811 		crc = (crc >> 4) ^ crctab[crc & 0xf];
812 		crc = (crc >> 4) ^ crctab[crc & 0xf];
813 	}
814 
815 	return (crc);
816 }
817 #endif
818 
819 uint32_t
820 ether_crc32_be(const uint8_t *buf, size_t len)
821 {
822 	uint32_t c, crc, carry;
823 	size_t i, j;
824 
825 	crc = 0xffffffffU;	/* initial value */
826 
827 	for (i = 0; i < len; i++) {
828 		c = buf[i];
829 		for (j = 0; j < 8; j++) {
830 			carry = ((crc & 0x80000000U) ? 1 : 0) ^ (c & 0x01);
831 			crc <<= 1;
832 			c >>= 1;
833 			if (carry)
834 				crc = (crc ^ ETHER_CRC_POLY_BE) | carry;
835 		}
836 	}
837 
838 	return (crc);
839 }
840 
841 /*
842  * find the size of ethernet header, and call classifier
843  */
844 void
845 altq_etherclassify(struct ifaltq *ifq, struct mbuf *m,
846 		   struct altq_pktattr *pktattr)
847 {
848 	struct ether_header *eh;
849 	uint16_t ether_type;
850 	int hlen, af, hdrsize;
851 	caddr_t hdr;
852 
853 	hlen = sizeof(struct ether_header);
854 	eh = mtod(m, struct ether_header *);
855 
856 	ether_type = ntohs(eh->ether_type);
857 	if (ether_type < ETHERMTU) {
858 		/* ick! LLC/SNAP */
859 		struct llc *llc = (struct llc *)(eh + 1);
860 		hlen += 8;
861 
862 		if (m->m_len < hlen ||
863 		    llc->llc_dsap != LLC_SNAP_LSAP ||
864 		    llc->llc_ssap != LLC_SNAP_LSAP ||
865 		    llc->llc_control != LLC_UI)
866 			goto bad;  /* not snap! */
867 
868 		ether_type = ntohs(llc->llc_un.type_snap.ether_type);
869 	}
870 
871 	if (ether_type == ETHERTYPE_IP) {
872 		af = AF_INET;
873 		hdrsize = 20;  /* sizeof(struct ip) */
874 #ifdef INET6
875 	} else if (ether_type == ETHERTYPE_IPV6) {
876 		af = AF_INET6;
877 		hdrsize = 40;  /* sizeof(struct ip6_hdr) */
878 #endif
879 	} else
880 		goto bad;
881 
882 	while (m->m_len <= hlen) {
883 		hlen -= m->m_len;
884 		m = m->m_next;
885 	}
886 	hdr = m->m_data + hlen;
887 	if (m->m_len < hlen + hdrsize) {
888 		/*
889 		 * ip header is not in a single mbuf.  this should not
890 		 * happen in the current code.
891 		 * (todo: use m_pulldown in the future)
892 		 */
893 		goto bad;
894 	}
895 	m->m_data += hlen;
896 	m->m_len -= hlen;
897 	ifq_classify(ifq, m, af, pktattr);
898 	m->m_data -= hlen;
899 	m->m_len += hlen;
900 
901 	return;
902 
903 bad:
904 	pktattr->pattr_class = NULL;
905 	pktattr->pattr_hdr = NULL;
906 	pktattr->pattr_af = AF_UNSPEC;
907 }
908 
909 static void
910 ether_restore_header(struct mbuf **m0, const struct ether_header *eh,
911 		     const struct ether_header *save_eh)
912 {
913 	struct mbuf *m = *m0;
914 
915 	ether_restore_hdr++;
916 
917 	/*
918 	 * Prepend the header, optimize for the common case of
919 	 * eh pointing into the mbuf.
920 	 */
921 	if ((const void *)(eh + 1) == (void *)m->m_data) {
922 		m->m_data -= ETHER_HDR_LEN;
923 		m->m_len += ETHER_HDR_LEN;
924 		m->m_pkthdr.len += ETHER_HDR_LEN;
925 	} else {
926 		ether_prepend_hdr++;
927 
928 		M_PREPEND(m, ETHER_HDR_LEN, MB_DONTWAIT);
929 		if (m != NULL) {
930 			bcopy(save_eh, mtod(m, struct ether_header *),
931 			      ETHER_HDR_LEN);
932 		}
933 	}
934 	*m0 = m;
935 }
936 
937 /*
938  * Upper layer processing for a received Ethernet packet.
939  */
940 void
941 ether_demux_oncpu(struct ifnet *ifp, struct mbuf *m)
942 {
943 	struct ether_header *eh;
944 	int isr, discard = 0;
945 	u_short ether_type;
946 	struct ip_fw *rule = NULL;
947 
948 	M_ASSERTPKTHDR(m);
949 	KASSERT(m->m_len >= ETHER_HDR_LEN,
950 		("ether header is not contiguous!"));
951 
952 	eh = mtod(m, struct ether_header *);
953 
954 	if (m->m_pkthdr.fw_flags & DUMMYNET_MBUF_TAGGED) {
955 		struct m_tag *mtag;
956 
957 		/* Extract info from dummynet tag */
958 		mtag = m_tag_find(m, PACKET_TAG_DUMMYNET, NULL);
959 		KKASSERT(mtag != NULL);
960 		rule = ((struct dn_pkt *)m_tag_data(mtag))->dn_priv;
961 		KKASSERT(rule != NULL);
962 
963 		m_tag_delete(m, mtag);
964 		m->m_pkthdr.fw_flags &= ~DUMMYNET_MBUF_TAGGED;
965 
966 		/* packet is passing the second time */
967 		goto post_stats;
968 	}
969 
970 	/*
971 	 * We got a packet which was unicast to a different Ethernet
972 	 * address.  If the driver is working properly, then this
973 	 * situation can only happen when the interface is in
974 	 * promiscuous mode.  We defer the packet discarding until the
975 	 * vlan processing is done, so that vlan/bridge or vlan/netgraph
976 	 * could work.
977 	 */
978 	if (((ifp->if_flags & (IFF_PROMISC | IFF_PPROMISC)) == IFF_PROMISC) &&
979 	    !ETHER_IS_MULTICAST(eh->ether_dhost) &&
980 	    bcmp(eh->ether_dhost, IFP2AC(ifp)->ac_enaddr, ETHER_ADDR_LEN)) {
981 		if (ether_debug & 1) {
982 			kprintf("%02x:%02x:%02x:%02x:%02x:%02x "
983 				"%02x:%02x:%02x:%02x:%02x:%02x "
984 				"%04x vs %02x:%02x:%02x:%02x:%02x:%02x\n",
985 				eh->ether_dhost[0],
986 				eh->ether_dhost[1],
987 				eh->ether_dhost[2],
988 				eh->ether_dhost[3],
989 				eh->ether_dhost[4],
990 				eh->ether_dhost[5],
991 				eh->ether_shost[0],
992 				eh->ether_shost[1],
993 				eh->ether_shost[2],
994 				eh->ether_shost[3],
995 				eh->ether_shost[4],
996 				eh->ether_shost[5],
997 				eh->ether_type,
998 				((u_char *)IFP2AC(ifp)->ac_enaddr)[0],
999 				((u_char *)IFP2AC(ifp)->ac_enaddr)[1],
1000 				((u_char *)IFP2AC(ifp)->ac_enaddr)[2],
1001 				((u_char *)IFP2AC(ifp)->ac_enaddr)[3],
1002 				((u_char *)IFP2AC(ifp)->ac_enaddr)[4],
1003 				((u_char *)IFP2AC(ifp)->ac_enaddr)[5]
1004 			);
1005 		}
1006 		if ((ether_debug & 2) == 0)
1007 			discard = 1;
1008 	}
1009 
1010 post_stats:
1011 	if (IPFW_LOADED && ether_ipfw != 0 && !discard) {
1012 		struct ether_header save_eh = *eh;
1013 
1014 		/* XXX old crufty stuff, needs to be removed */
1015 		m_adj(m, sizeof(struct ether_header));
1016 
1017 		if (!ether_ipfw_chk(&m, NULL, &rule, eh)) {
1018 			m_freem(m);
1019 			return;
1020 		}
1021 
1022 		ether_restore_header(&m, eh, &save_eh);
1023 		if (m == NULL)
1024 			return;
1025 		eh = mtod(m, struct ether_header *);
1026 	}
1027 
1028 	ether_type = ntohs(eh->ether_type);
1029 	KKASSERT(ether_type != ETHERTYPE_VLAN);
1030 
1031 	if (m->m_flags & M_VLANTAG) {
1032 		void (*vlan_input_func)(struct mbuf *);
1033 
1034 		vlan_input_func = vlan_input_p;
1035 		if (vlan_input_func != NULL) {
1036 			vlan_input_func(m);
1037 		} else {
1038 			m->m_pkthdr.rcvif->if_noproto++;
1039 			m_freem(m);
1040 		}
1041 		return;
1042 	}
1043 
1044 	/*
1045 	 * If we have been asked to discard this packet
1046 	 * (e.g. not for us), drop it before entering
1047 	 * the upper layer.
1048 	 */
1049 	if (discard) {
1050 		m_freem(m);
1051 		return;
1052 	}
1053 
1054 	/*
1055 	 * Clear protocol specific flags,
1056 	 * before entering the upper layer.
1057 	 */
1058 	m->m_flags &= ~M_ETHER_FLAGS;
1059 
1060 	/* Strip ethernet header. */
1061 	m_adj(m, sizeof(struct ether_header));
1062 
1063 	switch (ether_type) {
1064 #ifdef INET
1065 	case ETHERTYPE_IP:
1066 		if ((m->m_flags & M_LENCHECKED) == 0) {
1067 			if (!ip_lengthcheck(&m, 0))
1068 				return;
1069 		}
1070 		if (ipflow_fastforward(m))
1071 			return;
1072 		isr = NETISR_IP;
1073 		break;
1074 
1075 	case ETHERTYPE_ARP:
1076 		if (ifp->if_flags & IFF_NOARP) {
1077 			/* Discard packet if ARP is disabled on interface */
1078 			m_freem(m);
1079 			return;
1080 		}
1081 		isr = NETISR_ARP;
1082 		break;
1083 #endif
1084 
1085 #ifdef INET6
1086 	case ETHERTYPE_IPV6:
1087 		isr = NETISR_IPV6;
1088 		break;
1089 #endif
1090 
1091 #ifdef IPX
1092 	case ETHERTYPE_IPX:
1093 		if (ef_inputp) {
1094 			/*
1095 			 * Hold BGL and recheck ef_inputp
1096 			 */
1097 			get_mplock();
1098 			if (ef_inputp && ef_inputp(ifp, eh, m) == 0) {
1099 				rel_mplock();
1100 				return;
1101 			}
1102 			rel_mplock();
1103 		}
1104 		isr = NETISR_IPX;
1105 		break;
1106 #endif
1107 
1108 #ifdef MPLS
1109 	case ETHERTYPE_MPLS:
1110 	case ETHERTYPE_MPLS_MCAST:
1111 		/* Should have been set by ether_input_pkt(). */
1112 		KKASSERT(m->m_flags & M_MPLSLABELED);
1113 		isr = NETISR_MPLS;
1114 		break;
1115 #endif
1116 
1117 	default:
1118 		/*
1119 		 * The accurate msgport is not determined before
1120 		 * we reach here, so recharacterize packet.
1121 		 */
1122 		m->m_flags &= ~M_HASH;
1123 #ifdef IPX
1124 		if (ef_inputp) {
1125 			/*
1126 			 * Hold BGL and recheck ef_inputp
1127 			 */
1128 			get_mplock();
1129 			if (ef_inputp && ef_inputp(ifp, eh, m) == 0) {
1130 				rel_mplock();
1131 				return;
1132 			}
1133 			rel_mplock();
1134 		}
1135 #endif
1136 		if (ng_ether_input_orphan_p != NULL) {
1137 			/*
1138 			 * Put back the ethernet header so netgraph has a
1139 			 * consistent view of inbound packets.
1140 			 */
1141 			M_PREPEND(m, ETHER_HDR_LEN, MB_DONTWAIT);
1142 			if (m == NULL) {
1143 				/*
1144 				 * M_PREPEND frees the mbuf in case of failure.
1145 				 */
1146 				return;
1147 			}
1148 			/*
1149 			 * Hold BGL and recheck ng_ether_input_orphan_p
1150 			 */
1151 			get_mplock();
1152 			if (ng_ether_input_orphan_p != NULL) {
1153 				ng_ether_input_orphan_p(ifp, m);
1154 				rel_mplock();
1155 				return;
1156 			}
1157 			rel_mplock();
1158 		}
1159 		m_freem(m);
1160 		return;
1161 	}
1162 
1163 	if (m->m_flags & M_HASH) {
1164 		if (&curthread->td_msgport == cpu_portfn(m->m_pkthdr.hash)) {
1165 			netisr_handle(isr, m);
1166 			return;
1167 		} else {
1168 			/*
1169 			 * XXX Something is wrong,
1170 			 * we probably should panic here!
1171 			 */
1172 			m->m_flags &= ~M_HASH;
1173 			atomic_add_long(&ether_input_wronghash, 1);
1174 		}
1175 	}
1176 #ifdef RSS_DEBUG
1177 	atomic_add_long(&ether_input_requeue, 1);
1178 #endif
1179 	netisr_queue(isr, m);
1180 }
1181 
1182 /*
1183  * First we perform any link layer operations, then continue to the
1184  * upper layers with ether_demux_oncpu().
1185  */
1186 static void
1187 ether_input_oncpu(struct ifnet *ifp, struct mbuf *m)
1188 {
1189 #ifdef CARP
1190 	void *carp;
1191 #endif
1192 
1193 	if ((ifp->if_flags & (IFF_UP | IFF_MONITOR)) != IFF_UP) {
1194 		/*
1195 		 * Receiving interface's flags are changed, when this
1196 		 * packet is waiting for processing; discard it.
1197 		 */
1198 		m_freem(m);
1199 		return;
1200 	}
1201 
1202 	/*
1203 	 * Tap the packet off here for a bridge.  bridge_input()
1204 	 * will return NULL if it has consumed the packet, otherwise
1205 	 * it gets processed as normal.  Note that bridge_input()
1206 	 * will always return the original packet if we need to
1207 	 * process it locally.
1208 	 */
1209 	if (ifp->if_bridge) {
1210 		KASSERT(bridge_input_p != NULL,
1211 			("%s: if_bridge not loaded!", __func__));
1212 
1213 		if(m->m_flags & M_ETHER_BRIDGED) {
1214 			m->m_flags &= ~M_ETHER_BRIDGED;
1215 		} else {
1216 			m = bridge_input_p(ifp, m);
1217 			if (m == NULL)
1218 				return;
1219 
1220 			KASSERT(ifp == m->m_pkthdr.rcvif,
1221 				("bridge_input_p changed rcvif"));
1222 		}
1223 	}
1224 
1225 #ifdef CARP
1226 	carp = ifp->if_carp;
1227 	if (carp) {
1228 		m = carp_input(carp, m);
1229 		if (m == NULL)
1230 			return;
1231 		KASSERT(ifp == m->m_pkthdr.rcvif,
1232 		    ("carp_input changed rcvif"));
1233 	}
1234 #endif
1235 
1236 	/* Handle ng_ether(4) processing, if any */
1237 	if (ng_ether_input_p != NULL) {
1238 		/*
1239 		 * Hold BGL and recheck ng_ether_input_p
1240 		 */
1241 		get_mplock();
1242 		if (ng_ether_input_p != NULL)
1243 			ng_ether_input_p(ifp, &m);
1244 		rel_mplock();
1245 
1246 		if (m == NULL)
1247 			return;
1248 	}
1249 
1250 	/* Continue with upper layer processing */
1251 	ether_demux_oncpu(ifp, m);
1252 }
1253 
1254 /*
1255  * Perform certain functions of ether_input_pkt():
1256  * - Test IFF_UP
1257  * - Update statistics
1258  * - Run bpf(4) tap if requested
1259  * Then pass the packet to ether_input_oncpu().
1260  *
1261  * This function should be used by pseudo interface (e.g. vlan(4)),
1262  * when it tries to claim that the packet is received by it.
1263  *
1264  * REINPUT_KEEPRCVIF
1265  * REINPUT_RUNBPF
1266  */
1267 void
1268 ether_reinput_oncpu(struct ifnet *ifp, struct mbuf *m, int reinput_flags)
1269 {
1270 	/* Discard packet if interface is not up */
1271 	if (!(ifp->if_flags & IFF_UP)) {
1272 		m_freem(m);
1273 		return;
1274 	}
1275 
1276 	/*
1277 	 * Change receiving interface.  The bridge will often pass a flag to
1278 	 * ask that this not be done so ARPs get applied to the correct
1279 	 * side.
1280 	 */
1281 	if ((reinput_flags & REINPUT_KEEPRCVIF) == 0 ||
1282 	    m->m_pkthdr.rcvif == NULL) {
1283 		m->m_pkthdr.rcvif = ifp;
1284 	}
1285 
1286 	/* Update statistics */
1287 	ifp->if_ipackets++;
1288 	ifp->if_ibytes += m->m_pkthdr.len;
1289 	if (m->m_flags & (M_MCAST | M_BCAST))
1290 		ifp->if_imcasts++;
1291 
1292 	if (reinput_flags & REINPUT_RUNBPF)
1293 		BPF_MTAP(ifp, m);
1294 
1295 	ether_input_oncpu(ifp, m);
1296 }
1297 
1298 static __inline boolean_t
1299 ether_vlancheck(struct mbuf **m0)
1300 {
1301 	struct mbuf *m = *m0;
1302 	struct ether_header *eh;
1303 	uint16_t ether_type;
1304 
1305 	eh = mtod(m, struct ether_header *);
1306 	ether_type = ntohs(eh->ether_type);
1307 
1308 	if (ether_type == ETHERTYPE_VLAN && (m->m_flags & M_VLANTAG) == 0) {
1309 		/*
1310 		 * Extract vlan tag if hardware does not do it for us
1311 		 */
1312 		vlan_ether_decap(&m);
1313 		if (m == NULL)
1314 			goto failed;
1315 
1316 		eh = mtod(m, struct ether_header *);
1317 		ether_type = ntohs(eh->ether_type);
1318 	}
1319 
1320 	if (ether_type == ETHERTYPE_VLAN && (m->m_flags & M_VLANTAG)) {
1321 		/*
1322 		 * To prevent possible dangerous recursion,
1323 		 * we don't do vlan-in-vlan
1324 		 */
1325 		m->m_pkthdr.rcvif->if_noproto++;
1326 		goto failed;
1327 	}
1328 	KKASSERT(ether_type != ETHERTYPE_VLAN);
1329 
1330 	m->m_flags |= M_ETHER_VLANCHECKED;
1331 	*m0 = m;
1332 	return TRUE;
1333 failed:
1334 	if (m != NULL)
1335 		m_freem(m);
1336 	*m0 = NULL;
1337 	return FALSE;
1338 }
1339 
1340 static void
1341 ether_input_handler(netmsg_t nmsg)
1342 {
1343 	struct netmsg_packet *nmp = &nmsg->packet;	/* actual size */
1344 	struct ether_header *eh;
1345 	struct ifnet *ifp;
1346 	struct mbuf *m;
1347 
1348 	m = nmp->nm_packet;
1349 	M_ASSERTPKTHDR(m);
1350 	ifp = m->m_pkthdr.rcvif;
1351 
1352 	eh = mtod(m, struct ether_header *);
1353 	if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
1354 		if (bcmp(ifp->if_broadcastaddr, eh->ether_dhost,
1355 			 ifp->if_addrlen) == 0)
1356 			m->m_flags |= M_BCAST;
1357 		else
1358 			m->m_flags |= M_MCAST;
1359 		ifp->if_imcasts++;
1360 	}
1361 
1362 	if ((m->m_flags & M_ETHER_VLANCHECKED) == 0) {
1363 		if (!ether_vlancheck(&m)) {
1364 			KKASSERT(m == NULL);
1365 			return;
1366 		}
1367 	}
1368 
1369 	ether_input_oncpu(ifp, m);
1370 }
1371 
1372 /*
1373  * Send the packet to the target msgport
1374  *
1375  * At this point the packet had better be characterized (M_HASH set),
1376  * so we know which cpu to send it to.
1377  */
1378 static void
1379 ether_dispatch(int isr, struct mbuf *m)
1380 {
1381 	struct netmsg_packet *pmsg;
1382 
1383 	KKASSERT(m->m_flags & M_HASH);
1384 	pmsg = &m->m_hdr.mh_netmsg;
1385 	netmsg_init(&pmsg->base, NULL, &netisr_apanic_rport,
1386 		    0, ether_input_handler);
1387 	pmsg->nm_packet = m;
1388 	pmsg->base.lmsg.u.ms_result = isr;
1389 
1390 	logether(disp_beg, NULL);
1391 	lwkt_sendmsg(cpu_portfn(m->m_pkthdr.hash), &pmsg->base.lmsg);
1392 	logether(disp_end, NULL);
1393 }
1394 
1395 /*
1396  * Process a received Ethernet packet.
1397  *
1398  * The ethernet header is assumed to be in the mbuf so the caller
1399  * MUST MAKE SURE that there are at least sizeof(struct ether_header)
1400  * bytes in the first mbuf.
1401  */
1402 void
1403 ether_input_pkt(struct ifnet *ifp, struct mbuf *m, const struct pktinfo *pi)
1404 {
1405 	int isr;
1406 
1407 	M_ASSERTPKTHDR(m);
1408 
1409 	/* Discard packet if interface is not up */
1410 	if (!(ifp->if_flags & IFF_UP)) {
1411 		m_freem(m);
1412 		return;
1413 	}
1414 
1415 	if (m->m_len < sizeof(struct ether_header)) {
1416 		/* XXX error in the caller. */
1417 		m_freem(m);
1418 		return;
1419 	}
1420 
1421 	m->m_pkthdr.rcvif = ifp;
1422 
1423 	logether(pkt_beg, ifp);
1424 
1425 	ETHER_BPF_MTAP(ifp, m);
1426 
1427 	ifp->if_ibytes += m->m_pkthdr.len;
1428 
1429 	if (ifp->if_flags & IFF_MONITOR) {
1430 		struct ether_header *eh;
1431 
1432 		eh = mtod(m, struct ether_header *);
1433 		if (ETHER_IS_MULTICAST(eh->ether_dhost))
1434 			ifp->if_imcasts++;
1435 
1436 		/*
1437 		 * Interface marked for monitoring; discard packet.
1438 		 */
1439 		m_freem(m);
1440 
1441 		logether(pkt_end, ifp);
1442 		return;
1443 	}
1444 
1445 	/*
1446 	 * If the packet has been characterized (pi->pi_netisr / M_HASH)
1447 	 * we can dispatch it immediately without further inspection.
1448 	 */
1449 	if (pi != NULL && (m->m_flags & M_HASH)) {
1450 #ifdef RSS_DEBUG
1451 		atomic_add_long(&ether_pktinfo_try, 1);
1452 #endif
1453 		netisr_hashcheck(pi->pi_netisr, m, pi);
1454 		if (m->m_flags & M_HASH) {
1455 			ether_dispatch(pi->pi_netisr, m);
1456 #ifdef RSS_DEBUG
1457 			atomic_add_long(&ether_pktinfo_hit, 1);
1458 #endif
1459 			logether(pkt_end, ifp);
1460 			return;
1461 		}
1462 	}
1463 #ifdef RSS_DEBUG
1464 	else if (ifp->if_capenable & IFCAP_RSS) {
1465 		if (pi == NULL)
1466 			atomic_add_long(&ether_rss_nopi, 1);
1467 		else
1468 			atomic_add_long(&ether_rss_nohash, 1);
1469 	}
1470 #endif
1471 
1472 	/*
1473 	 * Packet hash will be recalculated by software,
1474 	 * so clear the M_HASH flag set by the driver;
1475 	 * the hash value calculated by the hardware may
1476 	 * not be exactly what we want.
1477 	 */
1478 	m->m_flags &= ~M_HASH;
1479 
1480 	if (!ether_vlancheck(&m)) {
1481 		KKASSERT(m == NULL);
1482 		logether(pkt_end, ifp);
1483 		return;
1484 	}
1485 
1486 	isr = ether_characterize(&m);
1487 	if (m == NULL) {
1488 		logether(pkt_end, ifp);
1489 		return;
1490 	}
1491 
1492 	/*
1493 	 * Finally dispatch it
1494 	 */
1495 	ether_dispatch(isr, m);
1496 
1497 	logether(pkt_end, ifp);
1498 }
1499 
1500 static int
1501 ether_characterize(struct mbuf **m0)
1502 {
1503 	struct mbuf *m = *m0;
1504 	struct ether_header *eh;
1505 	uint16_t ether_type;
1506 	int isr;
1507 
1508 	eh = mtod(m, struct ether_header *);
1509 	ether_type = ntohs(eh->ether_type);
1510 
1511 	/*
1512 	 * Map ether type to netisr id.
1513 	 */
1514 	switch (ether_type) {
1515 #ifdef INET
1516 	case ETHERTYPE_IP:
1517 		isr = NETISR_IP;
1518 		break;
1519 
1520 	case ETHERTYPE_ARP:
1521 		isr = NETISR_ARP;
1522 		break;
1523 #endif
1524 
1525 #ifdef INET6
1526 	case ETHERTYPE_IPV6:
1527 		isr = NETISR_IPV6;
1528 		break;
1529 #endif
1530 
1531 #ifdef IPX
1532 	case ETHERTYPE_IPX:
1533 		isr = NETISR_IPX;
1534 		break;
1535 #endif
1536 
1537 #ifdef MPLS
1538 	case ETHERTYPE_MPLS:
1539 	case ETHERTYPE_MPLS_MCAST:
1540 		m->m_flags |= M_MPLSLABELED;
1541 		isr = NETISR_MPLS;
1542 		break;
1543 #endif
1544 
1545 	default:
1546 		/*
1547 		 * NETISR_MAX is an invalid value; it is chosen to let
1548 		 * netisr_characterize() know that we have no clear
1549 		 * idea where this packet should go.
1550 		 */
1551 		isr = NETISR_MAX;
1552 		break;
1553 	}
1554 
1555 	/*
1556 	 * Ask the isr to characterize the packet since we couldn't.
1557 	 * This is an attempt to optimally get us onto the correct protocol
1558 	 * thread.
1559 	 */
1560 	netisr_characterize(isr, &m, sizeof(struct ether_header));
1561 
1562 	*m0 = m;
1563 	return isr;
1564 }
1565 
1566 static void
1567 ether_demux_handler(netmsg_t nmsg)
1568 {
1569 	struct netmsg_packet *nmp = &nmsg->packet;	/* actual size */
1570 	struct ifnet *ifp;
1571 	struct mbuf *m;
1572 
1573 	m = nmp->nm_packet;
1574 	M_ASSERTPKTHDR(m);
1575 	ifp = m->m_pkthdr.rcvif;
1576 
1577 	ether_demux_oncpu(ifp, m);
1578 }
1579 
1580 void
1581 ether_demux(struct mbuf *m)
1582 {
1583 	struct netmsg_packet *pmsg;
1584 	int isr;
1585 
1586 	isr = ether_characterize(&m);
1587 	if (m == NULL)
1588 		return;
1589 
1590 	KKASSERT(m->m_flags & M_HASH);
1591 	pmsg = &m->m_hdr.mh_netmsg;
1592 	netmsg_init(&pmsg->base, NULL, &netisr_apanic_rport,
1593 	    0, ether_demux_handler);
1594 	pmsg->nm_packet = m;
1595 	pmsg->base.lmsg.u.ms_result = isr;
1596 
1597 	lwkt_sendmsg(cpu_portfn(m->m_pkthdr.hash), &pmsg->base.lmsg);
1598 }
1599 
1600 boolean_t
1601 ether_tso_pullup(struct mbuf **mp, int *hoff0, struct ip **ip, int *iphlen,
1602     struct tcphdr **th, int *thoff)
1603 {
1604 	struct mbuf *m = *mp;
1605 	struct ether_header *eh;
1606 	uint16_t type;
1607 	int hoff;
1608 
1609 	KASSERT(M_WRITABLE(m), ("not writable"));
1610 
1611 	hoff = ETHER_HDR_LEN;
1612 	if (m->m_len < hoff) {
1613 		m = m_pullup(m, hoff);
1614 		if (m == NULL)
1615 			goto failed;
1616 	}
1617 	eh = mtod(m, struct ether_header *);
1618 	type = eh->ether_type;
1619 
1620 	if (type == htons(ETHERTYPE_VLAN)) {
1621 		struct ether_vlan_header *evh;
1622 
1623 		hoff += EVL_ENCAPLEN;
1624 		if (m->m_len < hoff) {
1625 			m = m_pullup(m, hoff);
1626 			if (m == NULL)
1627 				goto failed;
1628 		}
1629 		evh = mtod(m, struct ether_vlan_header *);
1630 		type = evh->evl_proto;
1631 	}
1632 	KASSERT(type == htons(ETHERTYPE_IP), ("not IP %d", ntohs(type)));
1633 
1634 	*mp = m;
1635 	*hoff0 = hoff;
1636 	return tcp_tso_pullup(mp, hoff, ip, iphlen, th, thoff);
1637 
1638 failed:
1639 	if (m != NULL)
1640 		m_freem(m);
1641 	*mp = NULL;
1642 	return FALSE;
1643 }
1644 
1645 MODULE_VERSION(ether, 1);
1646