xref: /netbsd-src/sys/dev/hyperv/if_hvn.c (revision 481d3881954fd794ca5f2d880b68c53a5db8620e)
1 /*	$NetBSD: if_hvn.c,v 1.28 2024/07/05 04:31:50 rin Exp $	*/
2 /*	$OpenBSD: if_hvn.c,v 1.39 2018/03/11 14:31:34 mikeb Exp $	*/
3 
4 /*-
5  * Copyright (c) 2009-2012,2016 Microsoft Corp.
6  * Copyright (c) 2010-2012 Citrix Inc.
7  * Copyright (c) 2012 NetApp Inc.
8  * Copyright (c) 2016 Mike Belopuhov <mike@esdenera.com>
9  * All rights reserved.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice unmodified, this list of conditions, and the following
16  *    disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 /*
34  * The OpenBSD port was done under funding by Esdenera Networks GmbH.
35  */
36 
37 #include <sys/cdefs.h>
38 __KERNEL_RCSID(0, "$NetBSD: if_hvn.c,v 1.28 2024/07/05 04:31:50 rin Exp $");
39 
40 #ifdef _KERNEL_OPT
41 #include "opt_if_hvn.h"
42 #include "opt_inet.h"
43 #include "opt_inet6.h"
44 #endif
45 
46 #include <sys/param.h>
47 #include <sys/systm.h>
48 #include <sys/kernel.h>
49 #include <sys/device.h>
50 #include <sys/bitops.h>
51 #include <sys/bus.h>
52 #include <sys/condvar.h>
53 #include <sys/cpu.h>
54 #include <sys/evcnt.h>
55 #include <sys/intr.h>
56 #include <sys/kmem.h>
57 #include <sys/kthread.h>
58 #include <sys/mutex.h>
59 #include <sys/pcq.h>
60 #include <sys/sysctl.h>
61 #include <sys/workqueue.h>
62 
63 #include <net/if.h>
64 #include <net/if_ether.h>
65 #include <net/if_media.h>
66 #include <net/if_vlanvar.h>
67 #include <net/rss_config.h>
68 #include <netinet/in.h>
69 #include <netinet/ip.h>
70 #include <netinet/ip6.h>
71 #include <netinet/udp.h>
72 
73 #include <net/bpf.h>
74 
75 #include <dev/ic/ndisreg.h>
76 #include <dev/ic/rndisreg.h>
77 
78 #include <dev/hyperv/vmbusvar.h>
79 #include <dev/hyperv/if_hvnreg.h>
80 
81 #ifndef EVL_PRIO_BITS
82 #define EVL_PRIO_BITS	13
83 #endif
84 #ifndef EVL_CFI_BITS
85 #define EVL_CFI_BITS	12
86 #endif
87 
88 #define HVN_CHIM_SIZE			(15 * 1024 * 1024)
89 
90 #define HVN_NVS_MSGSIZE			32
91 #define HVN_NVS_BUFSIZE			PAGE_SIZE
92 
93 #define HVN_RING_BUFSIZE		(128 * PAGE_SIZE)
94 #define HVN_RING_IDX2CPU(sc, idx)	((idx) % ncpu)
95 
96 #ifndef HVN_CHANNEL_MAX_COUNT_DEFAULT
97 #define HVN_CHANNEL_MAX_COUNT_DEFAULT	8
98 #endif
99 
100 #ifndef HVN_LINK_STATE_CHANGE_DELAY
101 #define HVN_LINK_STATE_CHANGE_DELAY	5000
102 #endif
103 
104 #define HVN_WORKQUEUE_PRI		PRI_SOFTNET
105 
106 /*
107  * RNDIS control interface
108  */
109 #define HVN_RNDIS_CTLREQS		4
110 #define HVN_RNDIS_BUFSIZE		512
111 
112 struct rndis_cmd {
113 	uint32_t			rc_id;
114 	struct hvn_nvs_rndis		rc_msg;
115 	void				*rc_req;
116 	bus_dmamap_t			rc_dmap;
117 	bus_dma_segment_t		rc_segs;
118 	int				rc_nsegs;
119 	uint64_t			rc_gpa;
120 	struct rndis_packet_msg		rc_cmp;
121 	uint32_t			rc_cmplen;
122 	uint8_t				rc_cmpbuf[HVN_RNDIS_BUFSIZE];
123 	int				rc_done;
124 	TAILQ_ENTRY(rndis_cmd)		rc_entry;
125 	kmutex_t			rc_lock;
126 	kcondvar_t			rc_cv;
127 };
128 TAILQ_HEAD(rndis_queue, rndis_cmd);
129 
130 #define HVN_MTU_MIN			68
131 #define HVN_MTU_MAX			(65535 - ETHER_ADDR_LEN)
132 
133 #define HVN_RNDIS_XFER_SIZE		2048
134 
135 #define HVN_NDIS_TXCSUM_CAP_IP4 \
136 	(NDIS_TXCSUM_CAP_IP4 | NDIS_TXCSUM_CAP_IP4OPT)
137 #define HVN_NDIS_TXCSUM_CAP_TCP4 \
138 	(NDIS_TXCSUM_CAP_TCP4 | NDIS_TXCSUM_CAP_TCP4OPT)
139 #define HVN_NDIS_TXCSUM_CAP_TCP6 \
140 	(NDIS_TXCSUM_CAP_TCP6 | NDIS_TXCSUM_CAP_TCP6OPT | \
141 	    NDIS_TXCSUM_CAP_IP6EXT)
142 #define HVN_NDIS_TXCSUM_CAP_UDP6 \
143 	(NDIS_TXCSUM_CAP_UDP6 | NDIS_TXCSUM_CAP_IP6EXT)
144 #define HVN_NDIS_LSOV2_CAP_IP6 \
145 	(NDIS_LSOV2_CAP_IP6EXT | NDIS_LSOV2_CAP_TCP6OPT)
146 
147 #define HVN_RNDIS_CMD_NORESP	__BIT(0)
148 
149 #define HVN_NVS_CMD_NORESP	__BIT(0)
150 
151 /*
152  * Tx ring
153  */
154 #define HVN_TX_DESC			512
155 #define HVN_TX_FRAGS			15		/* 31 is the max */
156 #define HVN_TX_FRAG_SIZE		PAGE_SIZE
157 #define HVN_TX_PKT_SIZE			16384
158 
159 #define HVN_RNDIS_PKT_LEN					\
160 	(sizeof(struct rndis_packet_msg) +			\
161 	 sizeof(struct rndis_pktinfo) + NDIS_VLAN_INFO_SIZE +	\
162 	 sizeof(struct rndis_pktinfo) + NDIS_TXCSUM_INFO_SIZE)
163 
164 #define HVN_PKTSIZE_MIN(align)						\
165 	roundup2(ETHER_MIN_LEN + ETHER_VLAN_ENCAP_LEN - ETHER_CRC_LEN +	\
166 	HVN_RNDIS_PKT_LEN, (align))
167 #define HVN_PKTSIZE(m, align)						\
168 	roundup2((m)->m_pkthdr.len + HVN_RNDIS_PKT_LEN, (align))
169 
170 struct hvn_tx_desc {
171 	uint32_t			txd_id;
172 	struct vmbus_gpa		txd_sgl[HVN_TX_FRAGS + 1];
173 	int				txd_nsge;
174 	struct mbuf			*txd_buf;
175 	bus_dmamap_t			txd_dmap;
176 	struct vmbus_gpa		txd_gpa;
177 	struct rndis_packet_msg		*txd_req;
178 	TAILQ_ENTRY(hvn_tx_desc)	txd_entry;
179 	u_int				txd_refs;
180 	uint32_t			txd_flags;
181 #define HVN_TXD_FLAG_ONAGG		__BIT(0)
182 #define HVN_TXD_FLAG_DMAMAP		__BIT(1)
183 	uint32_t			txd_chim_index;
184 	int				txd_chim_size;
185 	STAILQ_ENTRY(hvn_tx_desc)	txd_agg_entry;
186 	STAILQ_HEAD(, hvn_tx_desc)	txd_agg_list;
187 };
188 
189 struct hvn_softc;
190 struct hvn_rx_ring;
191 
192 struct hvn_tx_ring {
193 	struct hvn_softc		*txr_softc;
194 	struct vmbus_channel		*txr_chan;
195 	struct hvn_rx_ring		*txr_rxr;
196 	void				*txr_si;
197 	char				txr_name[16];
198 
199 	int				txr_id;
200 	int				txr_oactive;
201 	int				txr_suspended;
202 	int				txr_csum_assist;
203 	uint64_t			txr_caps_assist;
204 	uint32_t			txr_flags;
205 #define HVN_TXR_FLAG_UDP_HASH		__BIT(0)
206 
207 	struct evcnt			txr_evpkts;
208 	struct evcnt			txr_evsends;
209 	struct evcnt			txr_evnodesc;
210 	struct evcnt			txr_evdmafailed;
211 	struct evcnt			txr_evdefrag;
212 	struct evcnt			txr_evpcqdrop;
213 	struct evcnt			txr_evtransmitdefer;
214 	struct evcnt			txr_evflushfailed;
215 	struct evcnt			txr_evchimneytried;
216 	struct evcnt			txr_evchimney;
217 	struct evcnt			txr_evvlanfixup;
218 	struct evcnt			txr_evvlanhwtagging;
219 	struct evcnt			txr_evvlantap;
220 
221 	kmutex_t			txr_lock;
222 	pcq_t				*txr_interq;
223 
224 	uint32_t			txr_avail;
225 	TAILQ_HEAD(, hvn_tx_desc)	txr_list;
226 	struct hvn_tx_desc		txr_desc[HVN_TX_DESC];
227 	uint8_t				*txr_msgs;
228 	struct hyperv_dma		txr_dma;
229 
230 	int				txr_chim_size;
231 
232 	/* Applied packet transmission aggregation limits. */
233 	int				txr_agg_szmax;
234 	short				txr_agg_pktmax;
235 	short				txr_agg_align;
236 
237 	/* Packet transmission aggregation states. */
238 	struct hvn_tx_desc		*txr_agg_txd;
239 	int				txr_agg_szleft;
240 	short				txr_agg_pktleft;
241 	struct rndis_packet_msg		*txr_agg_prevpkt;
242 
243 	/* Temporary stats for each sends. */
244 	int				txr_stat_pkts;
245 	int				txr_stat_size;
246 	int				txr_stat_mcasts;
247 
248 	int				(*txr_sendpkt)(struct hvn_tx_ring *,
249 					    struct hvn_tx_desc *);
250 } __aligned(CACHE_LINE_SIZE);
251 
252 struct hvn_rx_ring {
253 	struct hvn_softc		*rxr_softc;
254 	struct vmbus_channel		*rxr_chan;
255 	struct hvn_tx_ring		*rxr_txr;
256 	void				*rxr_si;
257 	bool				rxr_workqueue;
258 	char				rxr_name[16];
259 
260 	struct work			rxr_wk;
261 	volatile bool			rxr_onlist;
262 	volatile bool			rxr_onproc;
263 	kmutex_t			rxr_onwork_lock;
264 	kcondvar_t			rxr_onwork_cv;
265 
266 	uint32_t			rxr_flags;
267 #define HVN_RXR_FLAG_UDP_HASH		__BIT(0)
268 
269 	kmutex_t			rxr_lock;
270 
271 	struct evcnt			rxr_evpkts;
272 	struct evcnt			rxr_evcsum_ip;
273 	struct evcnt			rxr_evcsum_tcp;
274 	struct evcnt			rxr_evcsum_udp;
275 	struct evcnt			rxr_evvlanhwtagging;
276 	struct evcnt			rxr_evintr;
277 	struct evcnt			rxr_evdefer;
278 	struct evcnt			rxr_evdeferreq;
279 	struct evcnt			rxr_evredeferreq;
280 
281 	/* NVS */
282 	uint8_t				*rxr_nvsbuf;
283 } __aligned(CACHE_LINE_SIZE);
284 
285 struct hvn_softc {
286 	device_t			sc_dev;
287 
288 	struct vmbus_softc		*sc_vmbus;
289 	struct vmbus_channel		*sc_prichan;
290 	bus_dma_tag_t			sc_dmat;
291 
292 	struct ethercom			sc_ec;
293 	struct ifmedia			sc_media;
294 	struct if_percpuq		*sc_ipq;
295 	struct workqueue		*sc_wq;
296 	bool				sc_txrx_workqueue;
297 	kmutex_t			sc_core_lock;
298 
299 	kmutex_t			sc_link_lock;
300 	kcondvar_t			sc_link_cv;
301 	callout_t			sc_link_tmout;
302 	lwp_t				*sc_link_lwp;
303 	uint32_t			sc_link_ev;
304 #define HVN_LINK_EV_STATE_CHANGE	__BIT(0)
305 #define HVN_LINK_EV_NETWORK_CHANGE_TMOUT __BIT(1)
306 #define HVN_LINK_EV_NETWORK_CHANGE	__BIT(2)
307 #define HVN_LINK_EV_RESUME_NETWORK	__BIT(3)
308 #define HVN_LINK_EV_EXIT_THREAD		__BIT(4)
309 	int				sc_link_state;
310 	bool				sc_link_onproc;
311 	bool				sc_link_pending;
312 	bool				sc_link_suspend;
313 
314 	int				sc_tx_process_limit;
315 	int				sc_rx_process_limit;
316 	int				sc_tx_intr_process_limit;
317 	int				sc_rx_intr_process_limit;
318 
319 	struct sysctllog		*sc_sysctllog;
320 
321 	uint32_t			sc_caps;
322 #define HVN_CAPS_VLAN			__BIT(0)
323 #define HVN_CAPS_MTU			__BIT(1)
324 #define HVN_CAPS_IPCS			__BIT(2)
325 #define HVN_CAPS_TCP4CS			__BIT(3)
326 #define HVN_CAPS_TCP6CS			__BIT(4)
327 #define HVN_CAPS_UDP4CS			__BIT(5)
328 #define HVN_CAPS_UDP6CS			__BIT(6)
329 #define HVN_CAPS_TSO4			__BIT(7)
330 #define HVN_CAPS_TSO6			__BIT(8)
331 #define HVN_CAPS_HASHVAL		__BIT(9)
332 #define HVN_CAPS_UDPHASH		__BIT(10)
333 
334 	uint32_t			sc_flags;
335 #define HVN_SCF_ATTACHED		__BIT(0)
336 #define HVN_SCF_RXBUF_CONNECTED		__BIT(1)
337 #define HVN_SCF_CHIM_CONNECTED		__BIT(2)
338 #define HVN_SCF_REVOKED			__BIT(3)
339 #define HVN_SCF_HAS_RSSKEY		__BIT(4)
340 #define HVN_SCF_HAS_RSSIND		__BIT(5)
341 
342 	/* NVS protocol */
343 	int				sc_proto;
344 	uint32_t			sc_nvstid;
345 	uint8_t				sc_nvsrsp[HVN_NVS_MSGSIZE];
346 	int				sc_nvsdone;
347 	kmutex_t			sc_nvsrsp_lock;
348 	kcondvar_t			sc_nvsrsp_cv;
349 
350 	/* RNDIS protocol */
351 	int				sc_ndisver;
352 	uint32_t			sc_rndisrid;
353 	int				sc_tso_szmax;
354 	int				sc_tso_sgmin;
355 	uint32_t			sc_rndis_agg_size;
356 	uint32_t			sc_rndis_agg_pkts;
357 	uint32_t			sc_rndis_agg_align;
358 	struct rndis_queue		sc_cntl_sq; /* submission queue */
359 	kmutex_t			sc_cntl_sqlck;
360 	struct rndis_queue		sc_cntl_cq; /* completion queue */
361 	kmutex_t			sc_cntl_cqlck;
362 	struct rndis_queue		sc_cntl_fq; /* free queue */
363 	kmutex_t			sc_cntl_fqlck;
364 	kcondvar_t			sc_cntl_fqcv;
365 	struct rndis_cmd		sc_cntl_msgs[HVN_RNDIS_CTLREQS];
366 	struct hvn_nvs_rndis		sc_data_msg;
367 
368 	int				sc_rss_ind_size;
369 	uint32_t			sc_rss_hash; /* setting, NDIS_HASH_ */
370 	uint32_t			sc_rss_hcap; /* caps, NDIS_HASH_ */
371 	struct ndis_rssprm_toeplitz	sc_rss;
372 
373 	/* Rx ring */
374 	uint8_t				*sc_rx_ring;
375 	int				sc_rx_size;
376 	uint32_t			sc_rx_hndl;
377 	struct hyperv_dma		sc_rx_dma;
378 	struct hvn_rx_ring		*sc_rxr;
379 	int				sc_nrxr;
380 	int				sc_nrxr_inuse;
381 
382 	/* Tx ring */
383 	struct hvn_tx_ring		*sc_txr;
384 	int				sc_ntxr;
385 	int				sc_ntxr_inuse;
386 
387 	/* chimney sending buffers */
388 	uint8_t				*sc_chim;
389 	uint32_t			sc_chim_hndl;
390 	struct hyperv_dma		sc_chim_dma;
391 	kmutex_t			sc_chim_bmap_lock;
392 	u_long				*sc_chim_bmap;
393 	int				sc_chim_bmap_cnt;
394 	int				sc_chim_cnt;
395 	int				sc_chim_szmax;
396 
397 	/* Packet transmission aggregation user settings. */
398 	int				sc_agg_size;
399 	int				sc_agg_pkts;
400 };
401 
402 #define SC2IFP(_sc_)	(&(_sc_)->sc_ec.ec_if)
403 #define IFP2SC(_ifp_)	((_ifp_)->if_softc)
404 
405 #ifndef HVN_TX_PROCESS_LIMIT_DEFAULT
406 #define HVN_TX_PROCESS_LIMIT_DEFAULT		128
407 #endif
408 #ifndef HVN_RX_PROCESS_LIMIT_DEFAULT
409 #define HVN_RX_PROCESS_LIMIT_DEFAULT		128
410 #endif
411 #ifndef HVN_TX_INTR_PROCESS_LIMIT_DEFAULT
412 #define HVN_TX_INTR_PROCESS_LIMIT_DEFAULT	256
413 #endif
414 #ifndef HVN_RX_INTR_PROCESS_LIMIT_DEFAULT
415 #define HVN_RX_INTR_PROCESS_LIMIT_DEFAULT	256
416 #endif
417 
418 /*
419  * See hvn_set_hlen().
420  *
421  * This value is for Azure.  For Hyper-V, set this above
422  * 65536 to disable UDP datagram checksum fixup.
423  */
424 #ifndef HVN_UDP_CKSUM_FIXUP_MTU_DEFAULT
425 #define HVN_UDP_CKSUM_FIXUP_MTU_DEFAULT	1420
426 #endif
427 static int hvn_udpcs_fixup_mtu = HVN_UDP_CKSUM_FIXUP_MTU_DEFAULT;
428 
429 /* Limit chimney send size */
430 static int hvn_tx_chimney_size = 0;
431 
432 /* # of channels to use; each channel has one RX ring and one TX ring */
433 #ifndef HVN_CHANNEL_COUNT_DEFAULT
434 #define HVN_CHANNEL_COUNT_DEFAULT	0
435 #endif
436 static int hvn_channel_cnt = HVN_CHANNEL_COUNT_DEFAULT;
437 
438 /* # of transmit rings to use */
439 #ifndef HVN_TX_RING_COUNT_DEFAULT
440 #define HVN_TX_RING_COUNT_DEFAULT	0
441 #endif
442 static int hvn_tx_ring_cnt = HVN_TX_RING_COUNT_DEFAULT;
443 
444 /* Packet transmission aggregation size limit */
445 static int hvn_tx_agg_size = -1;
446 
447 /* Packet transmission aggregation count limit */
448 static int hvn_tx_agg_pkts = -1;
449 
450 static int	hvn_match(device_t, cfdata_t, void *);
451 static void	hvn_attach(device_t, device_t, void *);
452 static int	hvn_detach(device_t, int);
453 
454 CFATTACH_DECL_NEW(hvn, sizeof(struct hvn_softc),
455     hvn_match, hvn_attach, hvn_detach, NULL);
456 
457 static int	hvn_ioctl(struct ifnet *, u_long, void *);
458 static int	hvn_media_change(struct ifnet *);
459 static void	hvn_media_status(struct ifnet *, struct ifmediareq *);
460 static void	hvn_link_task(void *);
461 static void	hvn_link_event(struct hvn_softc *, uint32_t);
462 static void	hvn_link_netchg_tmout_cb(void *);
463 static int	hvn_init(struct ifnet *);
464 static int	hvn_init_locked(struct ifnet *);
465 static void	hvn_stop(struct ifnet *, int);
466 static void	hvn_stop_locked(struct ifnet *);
467 static void	hvn_start(struct ifnet *);
468 static int	hvn_transmit(struct ifnet *, struct mbuf *);
469 static void	hvn_deferred_transmit(void *);
470 static int	hvn_flush_txagg(struct hvn_tx_ring *);
471 static int	hvn_encap(struct hvn_tx_ring *, struct hvn_tx_desc *,
472 		    struct mbuf *, int);
473 static int	hvn_txpkt(struct hvn_tx_ring *, struct hvn_tx_desc *);
474 static void	hvn_txeof(struct hvn_tx_ring *, uint64_t);
475 static int	hvn_rx_ring_create(struct hvn_softc *, int);
476 static int	hvn_rx_ring_destroy(struct hvn_softc *);
477 static void	hvn_fixup_rx_data(struct hvn_softc *);
478 static int	hvn_tx_ring_create(struct hvn_softc *, int);
479 static void	hvn_tx_ring_destroy(struct hvn_softc *);
480 static void	hvn_set_chim_size(struct hvn_softc *, int);
481 static uint32_t	hvn_chim_alloc(struct hvn_softc *);
482 static void	hvn_chim_free(struct hvn_softc *, uint32_t);
483 static void	hvn_fixup_tx_data(struct hvn_softc *);
484 static struct mbuf *
485 		hvn_set_hlen(struct mbuf *, int *);
486 static int	hvn_txd_peek(struct hvn_tx_ring *);
487 static struct hvn_tx_desc *
488 		hvn_txd_get(struct hvn_tx_ring *);
489 static void	hvn_txd_put(struct hvn_tx_ring *, struct hvn_tx_desc *);
490 static void	hvn_txd_gc(struct hvn_tx_ring *, struct hvn_tx_desc *);
491 static void	hvn_txd_hold(struct hvn_tx_desc *);
492 static void	hvn_txd_agg(struct hvn_tx_desc *, struct hvn_tx_desc *);
493 static int	hvn_tx_ring_pending(struct hvn_tx_ring *);
494 static void	hvn_tx_ring_qflush(struct hvn_softc *, struct hvn_tx_ring *);
495 static int	hvn_get_rsscaps(struct hvn_softc *, int *);
496 static int	hvn_set_rss(struct hvn_softc *, uint16_t);
497 static void	hvn_fixup_rss_ind(struct hvn_softc *);
498 static int	hvn_get_hwcaps(struct hvn_softc *, struct ndis_offload *);
499 static int	hvn_set_capabilities(struct hvn_softc *, int);
500 static int	hvn_get_lladdr(struct hvn_softc *, uint8_t *);
501 static void	hvn_update_link_status(struct hvn_softc *);
502 static int	hvn_get_mtu(struct hvn_softc *, uint32_t *);
503 static int	hvn_channel_attach(struct hvn_softc *, struct vmbus_channel *);
504 static void	hvn_channel_detach(struct hvn_softc *, struct vmbus_channel *);
505 static void	hvn_channel_detach_all(struct hvn_softc *);
506 static int	hvn_subchannel_attach(struct hvn_softc *);
507 static int	hvn_synth_alloc_subchannels(struct hvn_softc *, int *);
508 static int	hvn_synth_attachable(const struct hvn_softc *);
509 static int	hvn_synth_attach(struct hvn_softc *, int);
510 static void	hvn_synth_detach(struct hvn_softc *);
511 static void	hvn_set_ring_inuse(struct hvn_softc *, int);
512 static void	hvn_disable_rx(struct hvn_softc *);
513 static void	hvn_drain_rxtx(struct hvn_softc *, int );
514 static void	hvn_suspend_data(struct hvn_softc *);
515 static void	hvn_suspend_mgmt(struct hvn_softc *);
516 static void	hvn_suspend(struct hvn_softc *) __unused;
517 static void	hvn_resume_tx(struct hvn_softc *, int);
518 static void	hvn_resume_data(struct hvn_softc *);
519 static void	hvn_resume_mgmt(struct hvn_softc *);
520 static void	hvn_resume(struct hvn_softc *) __unused;
521 static void	hvn_init_sysctls(struct hvn_softc *);
522 
523 /* NSVP */
524 static int	hvn_nvs_init(struct hvn_softc *);
525 static void	hvn_nvs_destroy(struct hvn_softc *);
526 static int	hvn_nvs_attach(struct hvn_softc *, int);
527 static int	hvn_nvs_connect_rxbuf(struct hvn_softc *);
528 static int	hvn_nvs_disconnect_rxbuf(struct hvn_softc *);
529 static int	hvn_nvs_connect_chim(struct hvn_softc *);
530 static int	hvn_nvs_disconnect_chim(struct hvn_softc *);
531 static void	hvn_handle_ring_work(struct work *, void *);
532 static void	hvn_nvs_softintr(void *);
533 static void	hvn_nvs_intr(void *);
534 static void	hvn_nvs_intr1(struct hvn_rx_ring *, int, int);
535 static int	hvn_nvs_cmd(struct hvn_softc *, void *, size_t, uint64_t,
536 		    u_int);
537 static int	hvn_nvs_ack(struct hvn_rx_ring *, uint64_t);
538 static void	hvn_nvs_detach(struct hvn_softc *);
539 static int	hvn_nvs_alloc_subchannels(struct hvn_softc *, int *);
540 
541 /* RNDIS */
542 static int	hvn_rndis_init(struct hvn_softc *);
543 static void	hvn_rndis_destroy(struct hvn_softc *);
544 static int	hvn_rndis_attach(struct hvn_softc *, int);
545 static int	hvn_rndis_cmd(struct hvn_softc *, struct rndis_cmd *, u_int);
546 static int	hvn_rndis_input(struct hvn_rx_ring *, uint64_t, void *);
547 static int	hvn_rxeof(struct hvn_rx_ring *, uint8_t *, uint32_t);
548 static void	hvn_rndis_complete(struct hvn_softc *, uint8_t *, uint32_t);
549 static int	hvn_rndis_output_sgl(struct hvn_tx_ring *,
550 		    struct hvn_tx_desc *);
551 static int	hvn_rndis_output_chim(struct hvn_tx_ring *,
552 		    struct hvn_tx_desc *);
553 static void	hvn_rndis_status(struct hvn_softc *, uint8_t *, uint32_t);
554 static int	hvn_rndis_query(struct hvn_softc *, uint32_t, void *, size_t *);
555 static int	hvn_rndis_query2(struct hvn_softc *, uint32_t, const void *,
556 		    size_t, void *, size_t *, size_t);
557 static int	hvn_rndis_set(struct hvn_softc *, uint32_t, void *, size_t);
558 static int	hvn_rndis_open(struct hvn_softc *);
559 static int	hvn_rndis_close(struct hvn_softc *);
560 static void	hvn_rndis_detach(struct hvn_softc *);
561 
562 static int
hvn_match(device_t parent,cfdata_t match,void * aux)563 hvn_match(device_t parent, cfdata_t match, void *aux)
564 {
565 	struct vmbus_attach_args *aa = aux;
566 
567 	if (memcmp(aa->aa_type, &hyperv_guid_network, sizeof(*aa->aa_type)))
568 		return 0;
569 	return 1;
570 }
571 
572 static void
hvn_attach(device_t parent,device_t self,void * aux)573 hvn_attach(device_t parent, device_t self, void *aux)
574 {
575 	struct hvn_softc *sc = device_private(self);
576 	struct vmbus_attach_args *aa = aux;
577 	struct ifnet *ifp = SC2IFP(sc);
578 	char xnamebuf[32];
579 	uint8_t enaddr[ETHER_ADDR_LEN];
580 	uint32_t mtu;
581 	int tx_ring_cnt, ring_cnt;
582 	int error;
583 
584 	sc->sc_dev = self;
585 	sc->sc_vmbus = (struct vmbus_softc *)device_private(parent);
586 	sc->sc_prichan = aa->aa_chan;
587 	sc->sc_dmat = sc->sc_vmbus->sc_dmat;
588 
589 	aprint_naive("\n");
590 	aprint_normal(": Hyper-V NetVSC\n");
591 
592 	sc->sc_txrx_workqueue = true;
593 	sc->sc_tx_process_limit = HVN_TX_PROCESS_LIMIT_DEFAULT;
594 	sc->sc_rx_process_limit = HVN_RX_PROCESS_LIMIT_DEFAULT;
595 	sc->sc_tx_intr_process_limit = HVN_TX_INTR_PROCESS_LIMIT_DEFAULT;
596 	sc->sc_rx_intr_process_limit = HVN_RX_INTR_PROCESS_LIMIT_DEFAULT;
597 	sc->sc_agg_size = hvn_tx_agg_size;
598 	sc->sc_agg_pkts = hvn_tx_agg_pkts;
599 
600 	mutex_init(&sc->sc_core_lock, MUTEX_DEFAULT, IPL_SOFTNET);
601 	mutex_init(&sc->sc_link_lock, MUTEX_DEFAULT, IPL_NET);
602 	cv_init(&sc->sc_link_cv, "hvnknkcv");
603 	callout_init(&sc->sc_link_tmout, CALLOUT_MPSAFE);
604 	callout_setfunc(&sc->sc_link_tmout, hvn_link_netchg_tmout_cb, sc);
605 	if (kthread_create(PRI_NONE, KTHREAD_MUSTJOIN | KTHREAD_MPSAFE, NULL,
606 	    hvn_link_task, sc, &sc->sc_link_lwp, "%slink",
607 	    device_xname(self))) {
608 		aprint_error_dev(self, "failed to create link thread\n");
609 		return;
610 	}
611 
612 	snprintf(xnamebuf, sizeof(xnamebuf), "%srxtx", device_xname(self));
613 	if (workqueue_create(&sc->sc_wq, xnamebuf, hvn_handle_ring_work,
614 	    sc, HVN_WORKQUEUE_PRI, IPL_NET, WQ_PERCPU | WQ_MPSAFE)) {
615 		aprint_error_dev(self, "failed to create workqueue\n");
616 		sc->sc_wq = NULL;
617 		goto destroy_link_thread;
618 	}
619 
620 	ring_cnt = hvn_channel_cnt;
621 	if (ring_cnt <= 0) {
622 		ring_cnt = ncpu;
623 		if (ring_cnt > HVN_CHANNEL_MAX_COUNT_DEFAULT)
624 			ring_cnt = HVN_CHANNEL_MAX_COUNT_DEFAULT;
625 	} else if (ring_cnt > ncpu)
626 		ring_cnt = ncpu;
627 
628 	tx_ring_cnt = hvn_tx_ring_cnt;
629 	if (tx_ring_cnt <= 0 || tx_ring_cnt > ring_cnt)
630 		tx_ring_cnt = ring_cnt;
631 
632 	if (hvn_tx_ring_create(sc, tx_ring_cnt)) {
633 		aprint_error_dev(self, "failed to create Tx ring\n");
634 		goto destroy_wq;
635 	}
636 
637 	if (hvn_rx_ring_create(sc, ring_cnt)) {
638 		aprint_error_dev(self, "failed to create Rx ring\n");
639 		goto destroy_tx_ring;
640 	}
641 
642 	strlcpy(ifp->if_xname, device_xname(sc->sc_dev), IFNAMSIZ);
643 	ifp->if_softc = sc;
644 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
645 	ifp->if_extflags = IFEF_MPSAFE;
646 	ifp->if_ioctl = hvn_ioctl;
647 	ifp->if_start = hvn_start;
648 	ifp->if_transmit = hvn_transmit;
649 	ifp->if_init = hvn_init;
650 	ifp->if_stop = hvn_stop;
651 	ifp->if_baudrate = IF_Gbps(10);
652 
653 	IFQ_SET_MAXLEN(&ifp->if_snd, uimax(HVN_TX_DESC - 1, IFQ_MAXLEN));
654 	IFQ_SET_READY(&ifp->if_snd);
655 
656 	/* Initialize ifmedia structures. */
657 	sc->sc_ec.ec_ifmedia = &sc->sc_media;
658 	ifmedia_init_with_lock(&sc->sc_media, IFM_IMASK,
659 	    hvn_media_change, hvn_media_status, &sc->sc_core_lock);
660 	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
661 	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_10G_T | IFM_FDX, 0, NULL);
662 	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_10G_T, 0, NULL);
663 	ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
664 
665 	if_initialize(ifp);
666 	sc->sc_ipq = if_percpuq_create(ifp);
667 	if_deferred_start_init(ifp, NULL);
668 
669 	hvn_nvs_init(sc);
670 	hvn_rndis_init(sc);
671 	if (hvn_synth_attach(sc, ETHERMTU)) {
672 		aprint_error_dev(self, "failed to attach synth\n");
673 		goto destroy_if_percpuq;
674 	}
675 
676 	aprint_normal_dev(self, "NVS %d.%d NDIS %d.%d\n",
677 	    sc->sc_proto >> 16, sc->sc_proto & 0xffff,
678 	    sc->sc_ndisver >> 16 , sc->sc_ndisver & 0xffff);
679 
680 	if (hvn_get_lladdr(sc, enaddr)) {
681 		aprint_error_dev(self,
682 		    "failed to obtain an ethernet address\n");
683 		goto detach_synth;
684 	}
685 	aprint_normal_dev(self, "Ethernet address %s\n", ether_sprintf(enaddr));
686 
687 	/*
688 	 * Fixup TX/RX stuffs after synthetic parts are attached.
689 	 */
690 	hvn_fixup_tx_data(sc);
691 	hvn_fixup_rx_data(sc);
692 
693 	ifp->if_capabilities |= sc->sc_txr[0].txr_caps_assist &
694 		(IFCAP_CSUM_IPv4_Tx | IFCAP_CSUM_IPv4_Rx |
695 		 IFCAP_CSUM_TCPv4_Tx | IFCAP_CSUM_TCPv4_Rx |
696 		 IFCAP_CSUM_TCPv6_Tx | IFCAP_CSUM_TCPv6_Rx |
697 		 IFCAP_CSUM_UDPv4_Tx | IFCAP_CSUM_UDPv4_Rx |
698 		 IFCAP_CSUM_UDPv6_Tx | IFCAP_CSUM_UDPv6_Rx);
699 	/* XXX TSOv4, TSOv6 */
700 	if (sc->sc_caps & HVN_CAPS_VLAN) {
701 		/* XXX not sure about VLAN_MTU. */
702 		sc->sc_ec.ec_capabilities |= ETHERCAP_VLAN_HWTAGGING;
703 		sc->sc_ec.ec_capabilities |= ETHERCAP_VLAN_MTU;
704 	}
705 	sc->sc_ec.ec_capabilities |= ETHERCAP_JUMBO_MTU;
706 
707 	ether_ifattach(ifp, enaddr);
708 
709 	error = hvn_get_mtu(sc, &mtu);
710 	if (error)
711 		mtu = ETHERMTU;
712 	if (mtu < ETHERMTU) {
713 		DPRINTF("%s: fixup mtu %u -> %u\n", device_xname(sc->sc_dev),
714 		    ETHERMTU, mtu);
715 		ifp->if_mtu = mtu;
716 	}
717 
718 	if_register(ifp);
719 
720 	/*
721 	 * Kick off link status check.
722 	 */
723 	hvn_link_event(sc, HVN_LINK_EV_STATE_CHANGE);
724 
725 	hvn_init_sysctls(sc);
726 
727 	if (pmf_device_register(self, NULL, NULL))
728 		pmf_class_network_register(self, ifp);
729 	else
730 		aprint_error_dev(self, "couldn't establish power handler\n");
731 
732 	SET(sc->sc_flags, HVN_SCF_ATTACHED);
733 	return;
734 
735 detach_synth:
736 	hvn_synth_detach(sc);
737 	hvn_rndis_destroy(sc);
738 	hvn_nvs_destroy(sc);
739 destroy_if_percpuq:
740 	if_percpuq_destroy(sc->sc_ipq);
741 	hvn_rx_ring_destroy(sc);
742 destroy_tx_ring:
743 	hvn_tx_ring_destroy(sc);
744 destroy_wq:
745 	workqueue_destroy(sc->sc_wq);
746 	sc->sc_wq = NULL;
747 destroy_link_thread:
748 	hvn_link_event(sc, HVN_LINK_EV_EXIT_THREAD);
749 	kthread_join(sc->sc_link_lwp);
750 	callout_destroy(&sc->sc_link_tmout);
751 	cv_destroy(&sc->sc_link_cv);
752 	mutex_destroy(&sc->sc_link_lock);
753 	mutex_destroy(&sc->sc_core_lock);
754 }
755 
756 static int
hvn_detach(device_t self,int flags)757 hvn_detach(device_t self, int flags)
758 {
759 	struct hvn_softc *sc = device_private(self);
760 	struct ifnet *ifp = SC2IFP(sc);
761 
762 	if (!ISSET(sc->sc_flags, HVN_SCF_ATTACHED))
763 		return 0;
764 
765 	if (vmbus_channel_is_revoked(sc->sc_prichan))
766 		SET(sc->sc_flags, HVN_SCF_REVOKED);
767 
768 	pmf_device_deregister(self);
769 
770 	mutex_enter(&sc->sc_core_lock);
771 
772 	if (ifp->if_flags & IFF_RUNNING)
773 		hvn_stop_locked(ifp);
774 	/*
775 	 * NOTE:
776 	 * hvn_stop() only suspends data, so management
777 	 * stuffs have to be suspended manually here.
778 	 */
779 	hvn_suspend_mgmt(sc);
780 
781 	ether_ifdetach(ifp);
782 	if_detach(ifp);
783 	if_percpuq_destroy(sc->sc_ipq);
784 
785 	hvn_link_event(sc, HVN_LINK_EV_EXIT_THREAD);
786 	kthread_join(sc->sc_link_lwp);
787 	callout_halt(&sc->sc_link_tmout, NULL);
788 
789 	hvn_synth_detach(sc);
790 	hvn_rndis_destroy(sc);
791 	hvn_nvs_destroy(sc);
792 
793 	mutex_exit(&sc->sc_core_lock);
794 
795 	hvn_rx_ring_destroy(sc);
796 	hvn_tx_ring_destroy(sc);
797 
798 	workqueue_destroy(sc->sc_wq);
799 	callout_destroy(&sc->sc_link_tmout);
800 	cv_destroy(&sc->sc_link_cv);
801 	mutex_destroy(&sc->sc_link_lock);
802 	mutex_destroy(&sc->sc_core_lock);
803 
804 	sysctl_teardown(&sc->sc_sysctllog);
805 
806 	return 0;
807 }
808 
809 static int
hvn_ioctl(struct ifnet * ifp,u_long command,void * data)810 hvn_ioctl(struct ifnet *ifp, u_long command, void * data)
811 {
812 	struct hvn_softc *sc = IFP2SC(ifp);
813 	struct ifreq *ifr = (struct ifreq *)data;
814 	uint32_t mtu;
815 	int s, error = 0;
816 
817 	switch (command) {
818 	case SIOCSIFMTU:
819 		if (ifr->ifr_mtu < HVN_MTU_MIN || ifr->ifr_mtu > HVN_MTU_MAX) {
820 			error = EINVAL;
821 			break;
822 		}
823 
824 		mutex_enter(&sc->sc_core_lock);
825 
826 		if (!(sc->sc_caps & HVN_CAPS_MTU)) {
827 			/* Can't change MTU */
828 			mutex_exit(&sc->sc_core_lock);
829 			error = EOPNOTSUPP;
830 			break;
831 		}
832 
833 		if (ifp->if_mtu == ifr->ifr_mtu) {
834 			mutex_exit(&sc->sc_core_lock);
835 			break;
836 		}
837 
838 		/*
839 		 * Suspend this interface before the synthetic parts
840 		 * are ripped.
841 		 */
842 		hvn_suspend(sc);
843 
844 		/*
845 		 * Detach the synthetics parts, i.e. NVS and RNDIS.
846 		 */
847 		hvn_synth_detach(sc);
848 
849 		/*
850 		 * Reattach the synthetic parts, i.e. NVS and RNDIS,
851 		 * with the new MTU setting.
852 		 */
853 		error = hvn_synth_attach(sc, ifr->ifr_mtu);
854 		if (error) {
855 			mutex_exit(&sc->sc_core_lock);
856 			break;
857 		}
858 
859 		error = hvn_get_mtu(sc, &mtu);
860 		if (error)
861 			mtu = ifr->ifr_mtu;
862 		DPRINTF("%s: RNDIS mtu=%d\n", device_xname(sc->sc_dev), mtu);
863 
864 		/*
865 		 * Commit the requested MTU, after the synthetic parts
866 		 * have been successfully attached.
867 		 */
868 		if (mtu >= ifr->ifr_mtu) {
869 			mtu = ifr->ifr_mtu;
870 		} else {
871 			DPRINTF("%s: fixup mtu %d -> %u\n",
872 			    device_xname(sc->sc_dev), ifr->ifr_mtu, mtu);
873 		}
874 		ifp->if_mtu = mtu;
875 
876 		/*
877 		 * Synthetic parts' reattach may change the chimney
878 		 * sending size; update it.
879 		 */
880 		if (sc->sc_txr[0].txr_chim_size > sc->sc_chim_szmax)
881 			hvn_set_chim_size(sc, sc->sc_chim_szmax);
882 
883 		/*
884 		 * All done!  Resume the interface now.
885 		 */
886 		hvn_resume(sc);
887 
888 		mutex_exit(&sc->sc_core_lock);
889 		break;
890 	default:
891 		s = splnet();
892 		if (command == SIOCGIFMEDIA || command == SIOCSIFMEDIA)
893 			error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, command);
894 		else
895 			error = ether_ioctl(ifp, command, data);
896 		splx(s);
897 		if (error == ENETRESET) {
898 			mutex_enter(&sc->sc_core_lock);
899 			if (ifp->if_flags & IFF_RUNNING)
900 				hvn_init_locked(ifp);
901 			mutex_exit(&sc->sc_core_lock);
902 			error = 0;
903 		}
904 		break;
905 	}
906 
907 	return error;
908 }
909 
910 static int
hvn_media_change(struct ifnet * ifp)911 hvn_media_change(struct ifnet *ifp)
912 {
913 	struct hvn_softc *sc = IFP2SC(ifp);
914 	struct ifmedia *ifm = &sc->sc_media;
915 
916 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
917 		return EINVAL;
918 
919 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
920 	case IFM_AUTO:
921 		break;
922 	default:
923 		device_printf(sc->sc_dev, "Only auto media type\n");
924 		return EINVAL;
925 	}
926 	return 0;
927 }
928 
929 static void
hvn_media_status(struct ifnet * ifp,struct ifmediareq * ifmr)930 hvn_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
931 {
932 	struct hvn_softc *sc = IFP2SC(ifp);
933 
934 	ifmr->ifm_status = IFM_AVALID;
935 	ifmr->ifm_active = IFM_ETHER;
936 
937 	if (sc->sc_link_state != LINK_STATE_UP) {
938 		ifmr->ifm_active |= IFM_NONE;
939 		return;
940 	}
941 
942 	ifmr->ifm_status |= IFM_ACTIVE;
943 	ifmr->ifm_active |= IFM_10G_T | IFM_FDX;
944 }
945 
946 static void
hvn_link_task(void * arg)947 hvn_link_task(void *arg)
948 {
949 	struct hvn_softc *sc = arg;
950 	struct ifnet *ifp = SC2IFP(sc);
951 	uint32_t event;
952 	int old_link_state;
953 
954 	mutex_enter(&sc->sc_link_lock);
955 	sc->sc_link_onproc = false;
956 	for (;;) {
957 		if (sc->sc_link_ev == 0) {
958 			cv_wait(&sc->sc_link_cv, &sc->sc_link_lock);
959 			continue;
960 		}
961 
962 		sc->sc_link_onproc = true;
963 		event = sc->sc_link_ev;
964 		sc->sc_link_ev = 0;
965 		mutex_exit(&sc->sc_link_lock);
966 
967 		if (event & HVN_LINK_EV_EXIT_THREAD)
968 			break;
969 
970 		if (sc->sc_link_suspend)
971 			goto next;
972 
973 		if (event & HVN_LINK_EV_RESUME_NETWORK) {
974 			if (sc->sc_link_pending)
975 				event |= HVN_LINK_EV_NETWORK_CHANGE;
976 			else
977 				event |= HVN_LINK_EV_STATE_CHANGE;
978 		}
979 
980 		if (event & HVN_LINK_EV_NETWORK_CHANGE) {
981 			/* Prevent any link status checks from running. */
982 			sc->sc_link_pending = true;
983 
984 			/*
985 			 * Fake up a [link down --> link up] state change;
986 			 * 5 seconds delay is used, which closely simulates
987 			 * miibus reaction upon link down event.
988 			 */
989 			old_link_state = sc->sc_link_state;
990 			sc->sc_link_state = LINK_STATE_DOWN;
991 			if (old_link_state != sc->sc_link_state) {
992 				if_link_state_change(ifp, LINK_STATE_DOWN);
993 			}
994 #if defined(HVN_LINK_STATE_CHANGE_DELAY) && HVN_LINK_STATE_CHANGE_DELAY > 0
995 			callout_schedule(&sc->sc_link_tmout,
996 			    mstohz(HVN_LINK_STATE_CHANGE_DELAY));
997 #else
998 			hvn_link_event(sc, HVN_LINK_EV_NETWORK_CHANGE_TMOUT);
999 #endif
1000 		} else if (event & HVN_LINK_EV_NETWORK_CHANGE_TMOUT) {
1001 			/* Re-allow link status checks. */
1002 			sc->sc_link_pending = false;
1003 			hvn_update_link_status(sc);
1004 		} else if (event & HVN_LINK_EV_STATE_CHANGE) {
1005 			if (!sc->sc_link_pending)
1006 				hvn_update_link_status(sc);
1007 		}
1008  next:
1009 		mutex_enter(&sc->sc_link_lock);
1010 		sc->sc_link_onproc = false;
1011 	}
1012 
1013 	mutex_enter(&sc->sc_link_lock);
1014 	sc->sc_link_onproc = false;
1015 	mutex_exit(&sc->sc_link_lock);
1016 
1017 	kthread_exit(0);
1018 }
1019 
1020 static void
hvn_link_event(struct hvn_softc * sc,uint32_t ev)1021 hvn_link_event(struct hvn_softc *sc, uint32_t ev)
1022 {
1023 
1024 	mutex_enter(&sc->sc_link_lock);
1025 	SET(sc->sc_link_ev, ev);
1026 	cv_signal(&sc->sc_link_cv);
1027 	mutex_exit(&sc->sc_link_lock);
1028 }
1029 
1030 static void
hvn_link_netchg_tmout_cb(void * arg)1031 hvn_link_netchg_tmout_cb(void *arg)
1032 {
1033 	struct hvn_softc *sc = arg;
1034 
1035 	hvn_link_event(sc, HVN_LINK_EV_NETWORK_CHANGE_TMOUT);
1036 }
1037 
1038 static int
hvn_init(struct ifnet * ifp)1039 hvn_init(struct ifnet *ifp)
1040 {
1041 	struct hvn_softc *sc = IFP2SC(ifp);
1042 	int error;
1043 
1044 	mutex_enter(&sc->sc_core_lock);
1045 	error = hvn_init_locked(ifp);
1046 	mutex_exit(&sc->sc_core_lock);
1047 
1048 	return error;
1049 }
1050 
1051 static int
hvn_init_locked(struct ifnet * ifp)1052 hvn_init_locked(struct ifnet *ifp)
1053 {
1054 	struct hvn_softc *sc = IFP2SC(ifp);
1055 	int error;
1056 
1057 	KASSERT(mutex_owned(&sc->sc_core_lock));
1058 
1059 	hvn_stop_locked(ifp);
1060 
1061 	error = hvn_rndis_open(sc);
1062 	if (error)
1063 		return error;
1064 
1065 	/* Clear TX 'suspended' bit. */
1066 	hvn_resume_tx(sc, sc->sc_ntxr_inuse);
1067 
1068 	/* Everything is ready; unleash! */
1069 	ifp->if_flags |= IFF_RUNNING;
1070 
1071 	return 0;
1072 }
1073 
1074 static void
hvn_stop(struct ifnet * ifp,int disable)1075 hvn_stop(struct ifnet *ifp, int disable)
1076 {
1077 	struct hvn_softc *sc = IFP2SC(ifp);
1078 
1079 	mutex_enter(&sc->sc_core_lock);
1080 	hvn_stop_locked(ifp);
1081 	mutex_exit(&sc->sc_core_lock);
1082 }
1083 
1084 static void
hvn_stop_locked(struct ifnet * ifp)1085 hvn_stop_locked(struct ifnet *ifp)
1086 {
1087 	struct hvn_softc *sc = IFP2SC(ifp);
1088 	int i;
1089 
1090 	KASSERT(mutex_owned(&sc->sc_core_lock));
1091 
1092 	/* Clear RUNNING bit ASAP. */
1093 	ifp->if_flags &= ~IFF_RUNNING;
1094 
1095 	/* Suspend data transfers. */
1096 	hvn_suspend_data(sc);
1097 
1098 	/* Clear OACTIVE state. */
1099 	for (i = 0; i < sc->sc_ntxr_inuse; i++)
1100 		sc->sc_txr[i].txr_oactive = 0;
1101 }
1102 
1103 static void
hvn_transmit_common(struct ifnet * ifp,struct hvn_tx_ring * txr,bool is_transmit)1104 hvn_transmit_common(struct ifnet *ifp, struct hvn_tx_ring *txr,
1105     bool is_transmit)
1106 {
1107 	struct hvn_tx_desc *txd;
1108 	struct mbuf *m;
1109 	int l2hlen = ETHER_HDR_LEN;
1110 
1111 	KASSERT(mutex_owned(&txr->txr_lock));
1112 
1113 	if (!(ifp->if_flags & IFF_RUNNING))
1114 		return;
1115 	if (txr->txr_oactive)
1116 		return;
1117 	if (txr->txr_suspended)
1118 		return;
1119 
1120 	for (;;) {
1121 		if (!hvn_txd_peek(txr)) {
1122 			/* transient */
1123 			txr->txr_oactive = 1;
1124 			txr->txr_evnodesc.ev_count++;
1125 			break;
1126 		}
1127 
1128 		if (is_transmit)
1129 			m = pcq_get(txr->txr_interq);
1130 		else
1131 			IFQ_DEQUEUE(&ifp->if_snd, m);
1132 		if (m == NULL)
1133 			break;
1134 
1135 #if defined(INET) || defined(INET6)
1136 		if (m->m_pkthdr.csum_flags &
1137 		    (M_CSUM_TCPv4|M_CSUM_UDPv4|M_CSUM_TCPv6|M_CSUM_UDPv6)) {
1138 			m = hvn_set_hlen(m, &l2hlen);
1139 			if (__predict_false(m == NULL)) {
1140 				if_statinc(ifp, if_oerrors);
1141 				continue;
1142 			}
1143 		}
1144 #endif
1145 
1146 		txd = hvn_txd_get(txr);
1147 		if (hvn_encap(txr, txd, m, l2hlen)) {
1148 			/* the chain is too large */
1149 			if_statinc(ifp, if_oerrors);
1150 			hvn_txd_put(txr, txd);
1151 			m_freem(m);
1152 			continue;
1153 		}
1154 
1155 		if (txr->txr_agg_pktleft == 0) {
1156 			if (txr->txr_agg_txd != NULL) {
1157 				hvn_flush_txagg(txr);
1158 			} else {
1159 				if (hvn_txpkt(txr, txd)) {
1160 					/* txd is freed, but m is not. */
1161 					m_freem(m);
1162 					if_statinc(ifp, if_oerrors);
1163 				}
1164 			}
1165 		}
1166 	}
1167 
1168 	/* Flush pending aggerated transmission. */
1169 	if (txr->txr_agg_txd != NULL)
1170 		hvn_flush_txagg(txr);
1171 }
1172 
1173 static void
hvn_start(struct ifnet * ifp)1174 hvn_start(struct ifnet *ifp)
1175 {
1176 	struct hvn_softc *sc = IFP2SC(ifp);
1177 	struct hvn_tx_ring *txr = &sc->sc_txr[0];
1178 
1179 	mutex_enter(&txr->txr_lock);
1180 	hvn_transmit_common(ifp, txr, false);
1181 	mutex_exit(&txr->txr_lock);
1182 }
1183 
1184 static int
hvn_select_txqueue(struct ifnet * ifp,struct mbuf * m __unused)1185 hvn_select_txqueue(struct ifnet *ifp, struct mbuf *m __unused)
1186 {
1187 	struct hvn_softc *sc = IFP2SC(ifp);
1188 	u_int cpu;
1189 
1190 	cpu = cpu_index(curcpu());
1191 
1192 	return cpu % sc->sc_ntxr_inuse;
1193 }
1194 
1195 static int
hvn_transmit(struct ifnet * ifp,struct mbuf * m)1196 hvn_transmit(struct ifnet *ifp, struct mbuf *m)
1197 {
1198 	struct hvn_softc *sc = IFP2SC(ifp);
1199 	struct hvn_tx_ring *txr;
1200 	int qid;
1201 
1202 	qid = hvn_select_txqueue(ifp, m);
1203 	txr = &sc->sc_txr[qid];
1204 
1205 	if (__predict_false(!pcq_put(txr->txr_interq, m))) {
1206 		mutex_enter(&txr->txr_lock);
1207 		txr->txr_evpcqdrop.ev_count++;
1208 		mutex_exit(&txr->txr_lock);
1209 		m_freem(m);
1210 		return ENOBUFS;
1211 	}
1212 
1213 	kpreempt_disable();
1214 	softint_schedule(txr->txr_si);
1215 	kpreempt_enable();
1216 	return 0;
1217 }
1218 
1219 static void
hvn_deferred_transmit(void * arg)1220 hvn_deferred_transmit(void *arg)
1221 {
1222 	struct hvn_tx_ring *txr = arg;
1223 	struct hvn_softc *sc = txr->txr_softc;
1224 	struct ifnet *ifp = SC2IFP(sc);
1225 
1226 	mutex_enter(&txr->txr_lock);
1227 	txr->txr_evtransmitdefer.ev_count++;
1228 	hvn_transmit_common(ifp, txr, true);
1229 	mutex_exit(&txr->txr_lock);
1230 }
1231 
1232 static inline char *
hvn_rndis_pktinfo_append(struct rndis_packet_msg * pkt,size_t pktsize,size_t datalen,uint32_t type)1233 hvn_rndis_pktinfo_append(struct rndis_packet_msg *pkt, size_t pktsize,
1234     size_t datalen, uint32_t type)
1235 {
1236 	struct rndis_pktinfo *pi;
1237 	size_t pi_size = sizeof(*pi) + datalen;
1238 	char *cp;
1239 
1240 	KASSERT(pkt->rm_pktinfooffset + pkt->rm_pktinfolen + pi_size <=
1241 	    pktsize);
1242 
1243 	cp = (char *)pkt + pkt->rm_pktinfooffset + pkt->rm_pktinfolen;
1244 	pi = (struct rndis_pktinfo *)cp;
1245 	pi->rm_size = pi_size;
1246 	pi->rm_type = type;
1247 	pi->rm_pktinfooffset = sizeof(*pi);
1248 	pkt->rm_pktinfolen += pi_size;
1249 	pkt->rm_dataoffset += pi_size;
1250 	pkt->rm_len += pi_size;
1251 
1252 	return (char *)pi->rm_data;
1253 }
1254 
1255 static struct mbuf *
hvn_pullup_hdr(struct mbuf * m,int len)1256 hvn_pullup_hdr(struct mbuf *m, int len)
1257 {
1258 	struct mbuf *mn;
1259 
1260 	if (__predict_false(m->m_len < len)) {
1261 		mn = m_pullup(m, len);
1262 		if (mn == NULL)
1263 			return NULL;
1264 		m = mn;
1265 	}
1266 	return m;
1267 }
1268 
1269 /*
1270  * NOTE: If this function failed, the m would be freed.
1271  */
1272 static struct mbuf *
hvn_set_hlen(struct mbuf * m,int * l2hlenp)1273 hvn_set_hlen(struct mbuf *m, int *l2hlenp)
1274 {
1275 	const struct ether_header *eh;
1276 	int l2hlen, off;
1277 
1278 	m = hvn_pullup_hdr(m, sizeof(*eh));
1279 	if (m == NULL)
1280 		return NULL;
1281 
1282 	eh = mtod(m, const struct ether_header *);
1283 	if (eh->ether_type == ntohs(ETHERTYPE_VLAN))
1284 		l2hlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1285 	else
1286 		l2hlen = ETHER_HDR_LEN;
1287 
1288 #if defined(INET)
1289 	if (m->m_pkthdr.csum_flags & (M_CSUM_TCPv4 | M_CSUM_UDPv4)) {
1290 		const struct ip *ip;
1291 
1292 		off = l2hlen + sizeof(*ip);
1293 		m = hvn_pullup_hdr(m, off);
1294 		if (m == NULL)
1295 			return NULL;
1296 
1297 		ip = (struct ip *)((mtod(m, uint8_t *)) + off);
1298 
1299 		/*
1300 		 * UDP checksum offload does not work in Azure, if the
1301 		 * following conditions meet:
1302 		 * - sizeof(IP hdr + UDP hdr + payload) > 1420.
1303 		 * - IP_DF is not set in the IP hdr.
1304 		 *
1305 		 * Fallback to software checksum for these UDP datagrams.
1306 		 */
1307 		if ((m->m_pkthdr.csum_flags & M_CSUM_UDPv4) &&
1308 		    m->m_pkthdr.len > hvn_udpcs_fixup_mtu + l2hlen &&
1309 		    !(ntohs(ip->ip_off) & IP_DF)) {
1310 			uint16_t *csump;
1311 
1312 			off = l2hlen +
1313 			    M_CSUM_DATA_IPv4_IPHL(m->m_pkthdr.csum_data);
1314 			m = hvn_pullup_hdr(m, off + sizeof(struct udphdr));
1315 			if (m == NULL)
1316 				return NULL;
1317 
1318 			csump = (uint16_t *)(mtod(m, uint8_t *) + off +
1319 			    M_CSUM_DATA_IPv4_OFFSET(m->m_pkthdr.csum_data));
1320 			*csump = cpu_in_cksum(m, m->m_pkthdr.len - off, off, 0);
1321 			m->m_pkthdr.csum_flags &= ~M_CSUM_UDPv4;
1322 		}
1323 	}
1324 #endif	/* INET */
1325 #if defined(INET) && defined(INET6)
1326 	else
1327 #endif	/* INET && INET6 */
1328 #if defined(INET6)
1329 	{
1330 		const struct ip6_hdr *ip6;
1331 
1332 		off = l2hlen + sizeof(*ip6);
1333 		m = hvn_pullup_hdr(m, off);
1334 		if (m == NULL)
1335 			return NULL;
1336 
1337 		ip6 = (struct ip6_hdr *)((mtod(m, uint8_t *)) + l2hlen);
1338 		if (ip6->ip6_nxt != IPPROTO_TCP &&
1339 		    ip6->ip6_nxt != IPPROTO_UDP) {
1340 			m_freem(m);
1341 			return NULL;
1342 		}
1343 	}
1344 #endif	/* INET6 */
1345 
1346 	*l2hlenp = l2hlen;
1347 
1348 	return m;
1349 }
1350 
1351 static int
hvn_flush_txagg(struct hvn_tx_ring * txr)1352 hvn_flush_txagg(struct hvn_tx_ring *txr)
1353 {
1354 	struct hvn_softc *sc = txr->txr_softc;
1355 	struct ifnet *ifp = SC2IFP(sc);
1356 	struct hvn_tx_desc *txd;
1357 	struct mbuf *m;
1358 	int error, pkts;
1359 
1360 	txd = txr->txr_agg_txd;
1361 	KASSERTMSG(txd != NULL, "no aggregate txdesc");
1362 
1363 	/*
1364 	 * Since hvn_txpkt() will reset this temporary stat, save
1365 	 * it now, so that oerrors can be updated properly, if
1366 	 * hvn_txpkt() ever fails.
1367 	 */
1368 	pkts = txr->txr_stat_pkts;
1369 
1370 	/*
1371 	 * Since txd's mbuf will _not_ be freed upon hvn_txpkt()
1372 	 * failure, save it for later freeing, if hvn_txpkt() ever
1373 	 * fails.
1374 	 */
1375 	m = txd->txd_buf;
1376 	error = hvn_txpkt(txr, txd);
1377 	if (__predict_false(error)) {
1378 		/* txd is freed, but m is not. */
1379 		m_freem(m);
1380 		txr->txr_evflushfailed.ev_count++;
1381 		if_statadd(ifp, if_oerrors, pkts);
1382 	}
1383 
1384 	/* Reset all aggregation states. */
1385 	txr->txr_agg_txd = NULL;
1386 	txr->txr_agg_szleft = 0;
1387 	txr->txr_agg_pktleft = 0;
1388 	txr->txr_agg_prevpkt = NULL;
1389 
1390 	return error;
1391 }
1392 
1393 static void *
hvn_try_txagg(struct hvn_tx_ring * txr,struct hvn_tx_desc * txd,int pktsz)1394 hvn_try_txagg(struct hvn_tx_ring *txr, struct hvn_tx_desc *txd, int pktsz)
1395 {
1396 	struct hvn_softc *sc = txr->txr_softc;
1397 	struct hvn_tx_desc *agg_txd;
1398 	struct rndis_packet_msg *pkt;
1399 	void *chim;
1400 	int olen;
1401 
1402 	if (txr->txr_agg_txd != NULL) {
1403 		if (txr->txr_agg_pktleft > 0 && txr->txr_agg_szleft > pktsz) {
1404 			agg_txd = txr->txr_agg_txd;
1405 			pkt = txr->txr_agg_prevpkt;
1406 
1407 			/*
1408 			 * Update the previous RNDIS packet's total length,
1409 			 * it can be increased due to the mandatory alignment
1410 			 * padding for this RNDIS packet.  And update the
1411 			 * aggregating txdesc's chimney sending buffer size
1412 			 * accordingly.
1413 			 *
1414 			 * XXX
1415 			 * Zero-out the padding, as required by the RNDIS spec.
1416 			 */
1417 			olen = pkt->rm_len;
1418 			pkt->rm_len = roundup2(olen, txr->txr_agg_align);
1419 			agg_txd->txd_chim_size += pkt->rm_len - olen;
1420 
1421 			/* Link this txdesc to the parent. */
1422 			hvn_txd_agg(agg_txd, txd);
1423 
1424 			chim = (uint8_t *)pkt + pkt->rm_len;
1425 			/* Save the current packet for later fixup. */
1426 			txr->txr_agg_prevpkt = chim;
1427 
1428 			txr->txr_agg_pktleft--;
1429 			txr->txr_agg_szleft -= pktsz;
1430 			if (txr->txr_agg_szleft <=
1431 			    HVN_PKTSIZE_MIN(txr->txr_agg_align)) {
1432 				/*
1433 				 * Probably can't aggregate more packets,
1434 				 * flush this aggregating txdesc proactively.
1435 				 */
1436 				txr->txr_agg_pktleft = 0;
1437 			}
1438 
1439 			/* Done! */
1440 			return chim;
1441 		}
1442 		hvn_flush_txagg(txr);
1443 	}
1444 
1445 	txr->txr_evchimneytried.ev_count++;
1446 	txd->txd_chim_index = hvn_chim_alloc(sc);
1447 	if (txd->txd_chim_index == HVN_NVS_CHIM_IDX_INVALID)
1448 		return NULL;
1449 	txr->txr_evchimney.ev_count++;
1450 
1451 	chim = sc->sc_chim + (txd->txd_chim_index * sc->sc_chim_szmax);
1452 
1453 	if (txr->txr_agg_pktmax > 1 &&
1454 	    txr->txr_agg_szmax > pktsz + HVN_PKTSIZE_MIN(txr->txr_agg_align)) {
1455 		txr->txr_agg_txd = txd;
1456 		txr->txr_agg_pktleft = txr->txr_agg_pktmax - 1;
1457 		txr->txr_agg_szleft = txr->txr_agg_szmax - pktsz;
1458 		txr->txr_agg_prevpkt = chim;
1459 	}
1460 
1461 	return chim;
1462 }
1463 
1464 static int
hvn_encap(struct hvn_tx_ring * txr,struct hvn_tx_desc * txd,struct mbuf * m,int l2hlen)1465 hvn_encap(struct hvn_tx_ring *txr, struct hvn_tx_desc *txd, struct mbuf *m,
1466     int l2hlen)
1467 {
1468 	/* Used to pad ethernet frames with < ETHER_MIN_LEN bytes */
1469 	static const char zero_pad[ETHER_MIN_LEN];
1470 	struct hvn_softc *sc = txr->txr_softc;
1471 	struct rndis_packet_msg *pkt;
1472 	bus_dma_segment_t *seg;
1473 	void *chim = NULL;
1474 	size_t pktlen, pktsize;
1475 	int l3hlen;
1476 	int i, rv;
1477 
1478 	if (ISSET(sc->sc_caps, HVN_CAPS_VLAN) && !vlan_has_tag(m)) {
1479 		struct ether_vlan_header *evl;
1480 
1481 		m = hvn_pullup_hdr(m, sizeof(*evl));
1482 		if (m == NULL) {
1483 			DPRINTF("%s: failed to pullup mbuf\n",
1484 			    device_xname(sc->sc_dev));
1485 			return -1;
1486 		}
1487 
1488 		evl = mtod(m, struct ether_vlan_header *);
1489 		if (evl->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1490 			struct ether_header *eh;
1491 			uint16_t proto = evl->evl_proto;
1492 
1493 			vlan_set_tag(m, ntohs(evl->evl_tag));
1494 
1495 			/*
1496 			 * Trim VLAN tag from header.
1497 			 */
1498 			memmove((uint8_t *)evl + ETHER_VLAN_ENCAP_LEN,
1499 			    evl, ETHER_HDR_LEN);
1500 			m_adj(m, ETHER_VLAN_ENCAP_LEN);
1501 
1502 			eh = mtod(m, struct ether_header *);
1503 			eh->ether_type = proto;
1504 
1505 			/*
1506 			 * Re-padding.  See sys/net/if_vlan.c:vlan_start().
1507 			 */
1508 			if (m->m_pkthdr.len < (ETHER_MIN_LEN - ETHER_CRC_LEN +
1509 			    ETHER_VLAN_ENCAP_LEN)) {
1510 				m_copyback(m, m->m_pkthdr.len,
1511 				    (ETHER_MIN_LEN - ETHER_CRC_LEN +
1512 				     ETHER_VLAN_ENCAP_LEN) -
1513 				    m->m_pkthdr.len, zero_pad);
1514 			}
1515 
1516 			txr->txr_evvlanfixup.ev_count++;
1517 		}
1518 	}
1519 
1520 	pkt = txd->txd_req;
1521 	pktsize = HVN_PKTSIZE(m, txr->txr_agg_align);
1522 	if (pktsize < txr->txr_chim_size) {
1523 		chim = hvn_try_txagg(txr, txd, pktsize);
1524 		if (chim != NULL)
1525 			pkt = chim;
1526 	} else {
1527 		if (txr->txr_agg_txd != NULL)
1528 			hvn_flush_txagg(txr);
1529 	}
1530 
1531 	memset(pkt, 0, HVN_RNDIS_PKT_LEN);
1532 	pkt->rm_type = REMOTE_NDIS_PACKET_MSG;
1533 	pkt->rm_len = sizeof(*pkt) + m->m_pkthdr.len;
1534 	pkt->rm_dataoffset = RNDIS_DATA_OFFSET;
1535 	pkt->rm_datalen = m->m_pkthdr.len;
1536 	pkt->rm_pktinfooffset = sizeof(*pkt); /* adjusted below */
1537 	pkt->rm_pktinfolen = 0;
1538 
1539 	if (txr->txr_flags & HVN_TXR_FLAG_UDP_HASH) {
1540 		char *cp;
1541 
1542 		/*
1543 		 * Set the hash value for this packet, so that the host could
1544 		 * dispatch the TX done event for this packet back to this TX
1545 		 * ring's channel.
1546 		 */
1547 		cp = hvn_rndis_pktinfo_append(pkt, HVN_RNDIS_PKT_LEN,
1548 		    HVN_NDIS_HASH_VALUE_SIZE, HVN_NDIS_PKTINFO_TYPE_HASHVAL);
1549 		memcpy(cp, &txr->txr_id, HVN_NDIS_HASH_VALUE_SIZE);
1550 	}
1551 
1552 	if (vlan_has_tag(m)) {
1553 		uint32_t vlan;
1554 		char *cp;
1555 		uint16_t tag;
1556 
1557 		tag = vlan_get_tag(m);
1558 		vlan = NDIS_VLAN_INFO_MAKE(EVL_VLANOFTAG(tag),
1559 		    EVL_PRIOFTAG(tag), EVL_CFIOFTAG(tag));
1560 		cp = hvn_rndis_pktinfo_append(pkt, HVN_RNDIS_PKT_LEN,
1561 		    NDIS_VLAN_INFO_SIZE, NDIS_PKTINFO_TYPE_VLAN);
1562 		memcpy(cp, &vlan, NDIS_VLAN_INFO_SIZE);
1563 		txr->txr_evvlanhwtagging.ev_count++;
1564 	}
1565 
1566 	if (m->m_pkthdr.csum_flags & txr->txr_csum_assist) {
1567 		uint32_t csum;
1568 		char *cp;
1569 
1570 		if (m->m_pkthdr.csum_flags & (M_CSUM_TCPv6 | M_CSUM_UDPv6)) {
1571 			csum = NDIS_TXCSUM_INFO_IPV6;
1572 			l3hlen = M_CSUM_DATA_IPv6_IPHL(m->m_pkthdr.csum_data);
1573 			if (m->m_pkthdr.csum_flags & M_CSUM_TCPv6)
1574 				csum |= NDIS_TXCSUM_INFO_MKTCPCS(l2hlen +
1575 				    l3hlen);
1576 			if (m->m_pkthdr.csum_flags & M_CSUM_UDPv6)
1577 				csum |= NDIS_TXCSUM_INFO_MKUDPCS(l2hlen +
1578 				    l3hlen);
1579 		} else {
1580 			csum = NDIS_TXCSUM_INFO_IPV4;
1581 			l3hlen = M_CSUM_DATA_IPv4_IPHL(m->m_pkthdr.csum_data);
1582 			if (m->m_pkthdr.csum_flags & M_CSUM_IPv4)
1583 				csum |= NDIS_TXCSUM_INFO_IPCS;
1584 			if (m->m_pkthdr.csum_flags & M_CSUM_TCPv4)
1585 				csum |= NDIS_TXCSUM_INFO_MKTCPCS(l2hlen +
1586 				    l3hlen);
1587 			if (m->m_pkthdr.csum_flags & M_CSUM_UDPv4)
1588 				csum |= NDIS_TXCSUM_INFO_MKUDPCS(l2hlen +
1589 				    l3hlen);
1590 		}
1591 		cp = hvn_rndis_pktinfo_append(pkt, HVN_RNDIS_PKT_LEN,
1592 		    NDIS_TXCSUM_INFO_SIZE, NDIS_PKTINFO_TYPE_CSUM);
1593 		memcpy(cp, &csum, NDIS_TXCSUM_INFO_SIZE);
1594 	}
1595 
1596 	pktlen = pkt->rm_pktinfooffset + pkt->rm_pktinfolen;
1597 	pkt->rm_pktinfooffset -= RNDIS_HEADER_OFFSET;
1598 
1599 	/*
1600 	 * Fast path: Chimney sending.
1601 	 */
1602 	if (chim != NULL) {
1603 		struct hvn_tx_desc *tgt_txd;
1604 
1605 		tgt_txd = (txr->txr_agg_txd != NULL) ? txr->txr_agg_txd : txd;
1606 
1607 		KASSERTMSG(pkt == chim,
1608 		    "RNDIS pkt not in chimney sending buffer");
1609 		KASSERTMSG(tgt_txd->txd_chim_index != HVN_NVS_CHIM_IDX_INVALID,
1610 		    "chimney sending buffer is not used");
1611 
1612 		tgt_txd->txd_chim_size += pkt->rm_len;
1613 		m_copydata(m, 0, m->m_pkthdr.len, (uint8_t *)chim + pktlen);
1614 
1615 		txr->txr_sendpkt = hvn_rndis_output_chim;
1616 		goto done;
1617 	}
1618 
1619 	KASSERTMSG(txr->txr_agg_txd == NULL, "aggregating sglist txdesc");
1620 	KASSERTMSG(txd->txd_chim_index == HVN_NVS_CHIM_IDX_INVALID,
1621 	    "chimney buffer is used");
1622 	KASSERTMSG(pkt == txd->txd_req, "RNDIS pkt not in txdesc");
1623 
1624 	rv = bus_dmamap_load_mbuf(sc->sc_dmat, txd->txd_dmap, m, BUS_DMA_READ |
1625 	    BUS_DMA_NOWAIT);
1626 	switch (rv) {
1627 	case 0:
1628 		break;
1629 	case EFBIG:
1630 		if (m_defrag(m, M_NOWAIT) != NULL) {
1631 			txr->txr_evdefrag.ev_count++;
1632 			if (bus_dmamap_load_mbuf(sc->sc_dmat, txd->txd_dmap, m,
1633 			    BUS_DMA_READ | BUS_DMA_NOWAIT) == 0)
1634 				break;
1635 		}
1636 		/* FALLTHROUGH */
1637 	default:
1638 		DPRINTF("%s: failed to load mbuf\n", device_xname(sc->sc_dev));
1639 		txr->txr_evdmafailed.ev_count++;
1640 		return -1;
1641 	}
1642 	bus_dmamap_sync(sc->sc_dmat, txd->txd_dmap,
1643 	    0, txd->txd_dmap->dm_mapsize, BUS_DMASYNC_PREWRITE);
1644 	SET(txd->txd_flags, HVN_TXD_FLAG_DMAMAP);
1645 
1646 	/* Attach an RNDIS message to the first slot */
1647 	txd->txd_sgl[0].gpa_page = txd->txd_gpa.gpa_page;
1648 	txd->txd_sgl[0].gpa_ofs = txd->txd_gpa.gpa_ofs;
1649 	txd->txd_sgl[0].gpa_len = pktlen;
1650 	txd->txd_nsge = txd->txd_dmap->dm_nsegs + 1;
1651 
1652 	for (i = 0; i < txd->txd_dmap->dm_nsegs; i++) {
1653 		seg = &txd->txd_dmap->dm_segs[i];
1654 		txd->txd_sgl[1 + i].gpa_page = atop(seg->ds_addr);
1655 		txd->txd_sgl[1 + i].gpa_ofs = seg->ds_addr & PAGE_MASK;
1656 		txd->txd_sgl[1 + i].gpa_len = seg->ds_len;
1657 	}
1658 
1659 	txd->txd_chim_index = HVN_NVS_CHIM_IDX_INVALID;
1660 	txd->txd_chim_size = 0;
1661 	txr->txr_sendpkt = hvn_rndis_output_sgl;
1662 done:
1663 	txd->txd_buf = m;
1664 
1665 	/* Update temporary stats for later use. */
1666 	txr->txr_stat_pkts++;
1667 	txr->txr_stat_size += m->m_pkthdr.len;
1668 	if (m->m_flags & M_MCAST)
1669 		txr->txr_stat_mcasts++;
1670 
1671 	return 0;
1672 }
1673 
1674 static void
hvn_bpf_mtap(struct hvn_tx_ring * txr,struct mbuf * m,u_int direction)1675 hvn_bpf_mtap(struct hvn_tx_ring *txr, struct mbuf *m, u_int direction)
1676 {
1677 	struct hvn_softc *sc = txr->txr_softc;
1678 	struct ifnet *ifp = SC2IFP(sc);
1679 	struct ether_header *eh;
1680 	struct ether_vlan_header evl;
1681 
1682 	if (!vlan_has_tag(m)) {
1683 		bpf_mtap(ifp, m, direction);
1684 		return;
1685 	}
1686 
1687 	if (ifp->if_bpf == NULL)
1688 		return;
1689 
1690 	txr->txr_evvlantap.ev_count++;
1691 
1692 	/*
1693 	 * Restore a VLAN tag for bpf.
1694 	 *
1695 	 * Do not modify contents of the original mbuf,
1696 	 * because Tx processing on the mbuf is still in progress.
1697 	 */
1698 
1699 	eh = mtod(m, struct ether_header *);
1700 	memcpy(evl.evl_dhost, eh->ether_dhost, ETHER_ADDR_LEN * 2);
1701 	evl.evl_encap_proto = htons(ETHERTYPE_VLAN);
1702 	evl.evl_tag = htons(vlan_get_tag(m));
1703 	evl.evl_proto = eh->ether_type;
1704 
1705 	/* Do not tap ether header of the original mbuf. */
1706 	m_adj(m, sizeof(*eh));
1707 
1708 	bpf_mtap2(ifp->if_bpf, &evl, sizeof(evl), m, direction);
1709 
1710 	/* Cannot restore ether header of the original mbuf,
1711 	 * but do not worry about it because just free it. */
1712 }
1713 
1714 static int
hvn_txpkt(struct hvn_tx_ring * txr,struct hvn_tx_desc * txd)1715 hvn_txpkt(struct hvn_tx_ring *txr, struct hvn_tx_desc *txd)
1716 {
1717 	struct hvn_softc *sc = txr->txr_softc;
1718 	struct ifnet *ifp = SC2IFP(sc);
1719 	const struct hvn_tx_desc *tmp_txd;
1720 	int error;
1721 
1722 	/*
1723 	 * Make sure that this txd and any aggregated txds are not
1724 	 * freed before bpf_mtap.
1725 	 */
1726 	hvn_txd_hold(txd);
1727 
1728 	error = (*txr->txr_sendpkt)(txr, txd);
1729 	if (error == 0) {
1730 		hvn_bpf_mtap(txr, txd->txd_buf, BPF_D_OUT);
1731 		STAILQ_FOREACH(tmp_txd, &txd->txd_agg_list, txd_agg_entry)
1732 			hvn_bpf_mtap(txr, tmp_txd->txd_buf, BPF_D_OUT);
1733 
1734 		if_statadd(ifp, if_opackets, txr->txr_stat_pkts);
1735 		if_statadd(ifp, if_obytes, txr->txr_stat_size);
1736 		if (txr->txr_stat_mcasts != 0)
1737 			if_statadd(ifp, if_omcasts, txr->txr_stat_mcasts);
1738 		txr->txr_evpkts.ev_count += txr->txr_stat_pkts;
1739 		txr->txr_evsends.ev_count++;
1740 	}
1741 
1742 	hvn_txd_put(txr, txd);
1743 
1744 	if (__predict_false(error)) {
1745 		/*
1746 		 * Caller will perform further processing on the
1747 		 * associated mbuf, so don't free it in hvn_txd_put();
1748 		 * only unload it from the DMA map in hvn_txd_put(),
1749 		 * if it was loaded.
1750 		 */
1751 		txd->txd_buf = NULL;
1752 		hvn_txd_put(txr, txd);
1753 	}
1754 
1755 	/* Reset temporary stats, after this sending is done. */
1756 	txr->txr_stat_pkts = 0;
1757 	txr->txr_stat_size = 0;
1758 	txr->txr_stat_mcasts = 0;
1759 
1760 	return error;
1761 }
1762 
1763 static void
hvn_txeof(struct hvn_tx_ring * txr,uint64_t tid)1764 hvn_txeof(struct hvn_tx_ring *txr, uint64_t tid)
1765 {
1766 	struct hvn_softc *sc = txr->txr_softc;
1767 	struct hvn_tx_desc *txd;
1768 	uint32_t id = tid >> 32;
1769 
1770 	if ((tid & 0xffffffffU) != 0)
1771 		return;
1772 
1773 	id -= HVN_NVS_CHIM_SIG;
1774 	if (id >= HVN_TX_DESC) {
1775 		device_printf(sc->sc_dev, "tx packet index too large: %u", id);
1776 		return;
1777 	}
1778 
1779 	txd = &txr->txr_desc[id];
1780 
1781 	if (txd->txd_buf == NULL)
1782 		device_printf(sc->sc_dev, "no mbuf @%u\n", id);
1783 
1784 	hvn_txd_put(txr, txd);
1785 }
1786 
1787 static int
hvn_rx_ring_create(struct hvn_softc * sc,int ring_cnt)1788 hvn_rx_ring_create(struct hvn_softc *sc, int ring_cnt)
1789 {
1790 	struct hvn_rx_ring *rxr;
1791 	int i;
1792 
1793 	if (sc->sc_proto <= HVN_NVS_PROTO_VERSION_2)
1794 		sc->sc_rx_size = 15 * 1024 * 1024;	/* 15MB */
1795 	else
1796 		sc->sc_rx_size = 16 * 1024 * 1024; 	/* 16MB */
1797 	sc->sc_rx_ring = hyperv_dma_alloc(sc->sc_dmat, &sc->sc_rx_dma,
1798 	    sc->sc_rx_size, PAGE_SIZE, PAGE_SIZE, sc->sc_rx_size / PAGE_SIZE);
1799 	if (sc->sc_rx_ring == NULL) {
1800 		DPRINTF("%s: failed to allocate Rx ring buffer\n",
1801 		    device_xname(sc->sc_dev));
1802 		return -1;
1803 	}
1804 
1805 	sc->sc_rxr = kmem_zalloc(sizeof(*rxr) * ring_cnt, KM_SLEEP);
1806 	sc->sc_nrxr_inuse = sc->sc_nrxr = ring_cnt;
1807 
1808 	for (i = 0; i < sc->sc_nrxr; i++) {
1809 		rxr = &sc->sc_rxr[i];
1810 		rxr->rxr_softc = sc;
1811 		if (i < sc->sc_ntxr) {
1812 			rxr->rxr_txr = &sc->sc_txr[i];
1813 			rxr->rxr_txr->txr_rxr = rxr;
1814 		}
1815 
1816 		mutex_init(&rxr->rxr_lock, MUTEX_DEFAULT, IPL_NET);
1817 		mutex_init(&rxr->rxr_onwork_lock, MUTEX_DEFAULT, IPL_NET);
1818 		cv_init(&rxr->rxr_onwork_cv, "waitonwk");
1819 
1820 		snprintf(rxr->rxr_name, sizeof(rxr->rxr_name),
1821 		    "%s-rx%d", device_xname(sc->sc_dev), i);
1822 		evcnt_attach_dynamic(&rxr->rxr_evpkts, EVCNT_TYPE_MISC,
1823 		    NULL, rxr->rxr_name, "packets received");
1824 		evcnt_attach_dynamic(&rxr->rxr_evcsum_ip, EVCNT_TYPE_MISC,
1825 		    NULL, rxr->rxr_name, "IP checksum");
1826 		evcnt_attach_dynamic(&rxr->rxr_evcsum_tcp, EVCNT_TYPE_MISC,
1827 		    NULL, rxr->rxr_name, "TCP checksum");
1828 		evcnt_attach_dynamic(&rxr->rxr_evcsum_udp, EVCNT_TYPE_MISC,
1829 		    NULL, rxr->rxr_name, "UDP checksum");
1830 		evcnt_attach_dynamic(&rxr->rxr_evvlanhwtagging, EVCNT_TYPE_MISC,
1831 		    NULL, rxr->rxr_name, "VLAN H/W tagging");
1832 		evcnt_attach_dynamic(&rxr->rxr_evintr, EVCNT_TYPE_INTR,
1833 		    NULL, rxr->rxr_name, "interrupt on ring");
1834 		evcnt_attach_dynamic(&rxr->rxr_evdefer, EVCNT_TYPE_MISC,
1835 		    NULL, rxr->rxr_name, "handled queue in workqueue");
1836 		evcnt_attach_dynamic(&rxr->rxr_evdeferreq, EVCNT_TYPE_MISC,
1837 		    NULL, rxr->rxr_name, "requested defer on ring");
1838 		evcnt_attach_dynamic(&rxr->rxr_evredeferreq, EVCNT_TYPE_MISC,
1839 		    NULL, rxr->rxr_name, "requested defer in workqueue");
1840 
1841 		rxr->rxr_nvsbuf = kmem_zalloc(HVN_NVS_BUFSIZE, KM_SLEEP);
1842 		if (rxr->rxr_nvsbuf == NULL) {
1843 			DPRINTF("%s: failed to allocate channel data buffer\n",
1844 			    device_xname(sc->sc_dev));
1845 			goto errout;
1846 		}
1847 
1848 		rxr->rxr_si = softint_establish(SOFTINT_NET | SOFTINT_MPSAFE,
1849 		    hvn_nvs_softintr, rxr);
1850 		if (rxr->rxr_si == NULL) {
1851 			DPRINTF("%s: failed to establish rx softint\n",
1852 			    device_xname(sc->sc_dev));
1853 			goto errout;
1854 		}
1855 	}
1856 
1857 	return 0;
1858 
1859  errout:
1860 	hvn_rx_ring_destroy(sc);
1861 	return -1;
1862 }
1863 
1864 static int
hvn_rx_ring_destroy(struct hvn_softc * sc)1865 hvn_rx_ring_destroy(struct hvn_softc *sc)
1866 {
1867 	struct hvn_rx_ring *rxr;
1868 	int i;
1869 
1870 	if (sc->sc_rxr != NULL) {
1871 		for (i = 0; i < sc->sc_nrxr; i++) {
1872 			rxr = &sc->sc_rxr[i];
1873 
1874 			if (rxr->rxr_si != NULL) {
1875 				softint_disestablish(rxr->rxr_si);
1876 				rxr->rxr_si = NULL;
1877 			}
1878 
1879 			if (rxr->rxr_nvsbuf != NULL) {
1880 				kmem_free(rxr->rxr_nvsbuf, HVN_NVS_BUFSIZE);
1881 				rxr->rxr_nvsbuf = NULL;
1882 			}
1883 
1884 			evcnt_detach(&rxr->rxr_evpkts);
1885 			evcnt_detach(&rxr->rxr_evcsum_ip);
1886 			evcnt_detach(&rxr->rxr_evcsum_tcp);
1887 			evcnt_detach(&rxr->rxr_evcsum_udp);
1888 			evcnt_detach(&rxr->rxr_evvlanhwtagging);
1889 			evcnt_detach(&rxr->rxr_evintr);
1890 			evcnt_detach(&rxr->rxr_evdefer);
1891 			evcnt_detach(&rxr->rxr_evdeferreq);
1892 			evcnt_detach(&rxr->rxr_evredeferreq);
1893 
1894 			cv_destroy(&rxr->rxr_onwork_cv);
1895 			mutex_destroy(&rxr->rxr_onwork_lock);
1896 			mutex_destroy(&rxr->rxr_lock);
1897 		}
1898 		kmem_free(sc->sc_rxr, sizeof(*rxr) * sc->sc_nrxr);
1899 		sc->sc_rxr = NULL;
1900 		sc->sc_nrxr = 0;
1901 	}
1902 	if (sc->sc_rx_ring != NULL) {
1903 		hyperv_dma_free(sc->sc_dmat, &sc->sc_rx_dma);
1904 		sc->sc_rx_ring = NULL;
1905 	}
1906 
1907 	return 0;
1908 }
1909 
1910 static void
hvn_fixup_rx_data(struct hvn_softc * sc)1911 hvn_fixup_rx_data(struct hvn_softc *sc)
1912 {
1913 	struct hvn_rx_ring *rxr;
1914 	int i;
1915 
1916 	if (sc->sc_caps & HVN_CAPS_UDPHASH) {
1917 		for (i = 0; i < sc->sc_nrxr; i++) {
1918 			rxr = &sc->sc_rxr[i];
1919 			rxr->rxr_flags |= HVN_RXR_FLAG_UDP_HASH;
1920 		}
1921 	}
1922 }
1923 
1924 static int
hvn_tx_ring_create(struct hvn_softc * sc,int ring_cnt)1925 hvn_tx_ring_create(struct hvn_softc *sc, int ring_cnt)
1926 {
1927 	struct hvn_tx_ring *txr;
1928 	struct hvn_tx_desc *txd;
1929 	bus_dma_segment_t *seg;
1930 	size_t msgsize;
1931 	int i, j;
1932 	paddr_t pa;
1933 
1934 	/*
1935 	 * Create TXBUF for chimney sending.
1936 	 *
1937 	 * NOTE: It is shared by all channels.
1938 	 */
1939 	sc->sc_chim = hyperv_dma_alloc(sc->sc_dmat, &sc->sc_chim_dma,
1940 	    HVN_CHIM_SIZE, PAGE_SIZE, 0, 1);
1941 	if (sc->sc_chim == NULL) {
1942 		DPRINTF("%s: failed to allocate chimney sending memory",
1943 		    device_xname(sc->sc_dev));
1944 		goto errout;
1945 	}
1946 
1947 	sc->sc_txr = kmem_zalloc(sizeof(*txr) * ring_cnt, KM_SLEEP);
1948 	sc->sc_ntxr_inuse = sc->sc_ntxr = ring_cnt;
1949 
1950 	msgsize = roundup(HVN_RNDIS_PKT_LEN, 128);
1951 
1952 	for (j = 0; j < ring_cnt; j++) {
1953 		txr = &sc->sc_txr[j];
1954 		txr->txr_softc = sc;
1955 		txr->txr_id = j;
1956 
1957 		mutex_init(&txr->txr_lock, MUTEX_DEFAULT, IPL_NET);
1958 		txr->txr_interq = pcq_create(HVN_TX_DESC, KM_SLEEP);
1959 
1960 		snprintf(txr->txr_name, sizeof(txr->txr_name),
1961 		    "%s-tx%d", device_xname(sc->sc_dev), j);
1962 		evcnt_attach_dynamic(&txr->txr_evpkts, EVCNT_TYPE_MISC,
1963 		    NULL, txr->txr_name, "packets transmit");
1964 		evcnt_attach_dynamic(&txr->txr_evsends, EVCNT_TYPE_MISC,
1965 		    NULL, txr->txr_name, "sends");
1966 		evcnt_attach_dynamic(&txr->txr_evnodesc, EVCNT_TYPE_MISC,
1967 		    NULL, txr->txr_name, "descriptor shortage");
1968 		evcnt_attach_dynamic(&txr->txr_evdmafailed, EVCNT_TYPE_MISC,
1969 		    NULL, txr->txr_name, "DMA failure");
1970 		evcnt_attach_dynamic(&txr->txr_evdefrag, EVCNT_TYPE_MISC,
1971 		    NULL, txr->txr_name, "mbuf defraged");
1972 		evcnt_attach_dynamic(&txr->txr_evpcqdrop, EVCNT_TYPE_MISC,
1973 		    NULL, txr->txr_name, "dropped in pcq");
1974 		evcnt_attach_dynamic(&txr->txr_evtransmitdefer, EVCNT_TYPE_MISC,
1975 		    NULL, txr->txr_name, "deferred transmit");
1976 		evcnt_attach_dynamic(&txr->txr_evflushfailed, EVCNT_TYPE_MISC,
1977 		    NULL, txr->txr_name, "aggregation flush failure");
1978 		evcnt_attach_dynamic(&txr->txr_evchimneytried, EVCNT_TYPE_MISC,
1979 		    NULL, txr->txr_name, "chimney send tried");
1980 		evcnt_attach_dynamic(&txr->txr_evchimney, EVCNT_TYPE_MISC,
1981 		    NULL, txr->txr_name, "chimney send");
1982 		evcnt_attach_dynamic(&txr->txr_evvlanfixup, EVCNT_TYPE_MISC,
1983 		    NULL, txr->txr_name, "VLAN fixup");
1984 		evcnt_attach_dynamic(&txr->txr_evvlanhwtagging, EVCNT_TYPE_MISC,
1985 		    NULL, txr->txr_name, "VLAN H/W tagging");
1986 		evcnt_attach_dynamic(&txr->txr_evvlantap, EVCNT_TYPE_MISC,
1987 		    NULL, txr->txr_name, "VLAN bpf_mtap fixup");
1988 
1989 		txr->txr_si = softint_establish(SOFTINT_NET | SOFTINT_MPSAFE,
1990 		    hvn_deferred_transmit, txr);
1991 		if (txr->txr_si == NULL) {
1992 			aprint_error_dev(sc->sc_dev,
1993 			    "failed to establish softint for tx ring\n");
1994 			goto errout;
1995 		}
1996 
1997 		/* Allocate memory to store RNDIS messages */
1998 		txr->txr_msgs = hyperv_dma_alloc(sc->sc_dmat, &txr->txr_dma,
1999 		    msgsize * HVN_TX_DESC, PAGE_SIZE, 0, 1);
2000 		if (txr->txr_msgs == NULL) {
2001 			DPRINTF("%s: failed to allocate memory for RDNIS "
2002 			    "messages\n", device_xname(sc->sc_dev));
2003 			goto errout;
2004 		}
2005 
2006 		TAILQ_INIT(&txr->txr_list);
2007 		for (i = 0; i < HVN_TX_DESC; i++) {
2008 			txd = &txr->txr_desc[i];
2009 			txd->txd_chim_index = HVN_NVS_CHIM_IDX_INVALID;
2010 			txd->txd_chim_size = 0;
2011 			STAILQ_INIT(&txd->txd_agg_list);
2012 			if (bus_dmamap_create(sc->sc_dmat, HVN_TX_PKT_SIZE,
2013 			    HVN_TX_FRAGS, HVN_TX_FRAG_SIZE, PAGE_SIZE,
2014 			    BUS_DMA_WAITOK, &txd->txd_dmap)) {
2015 				DPRINTF("%s: failed to create map for TX "
2016 				    "descriptors\n", device_xname(sc->sc_dev));
2017 				goto errout;
2018 			}
2019 			seg = &txr->txr_dma.map->dm_segs[0];
2020 			pa = seg->ds_addr + (msgsize * i);
2021 			txd->txd_gpa.gpa_page = atop(pa);
2022 			txd->txd_gpa.gpa_ofs = pa & PAGE_MASK;
2023 			txd->txd_gpa.gpa_len = msgsize;
2024 			txd->txd_req = (void *)(txr->txr_msgs + (msgsize * i));
2025 			txd->txd_id = i + HVN_NVS_CHIM_SIG;
2026 			TAILQ_INSERT_TAIL(&txr->txr_list, txd, txd_entry);
2027 		}
2028 		txr->txr_avail = HVN_TX_DESC;
2029 	}
2030 
2031 	return 0;
2032 
2033  errout:
2034 	hvn_tx_ring_destroy(sc);
2035 	return -1;
2036 }
2037 
2038 static void
hvn_tx_ring_destroy(struct hvn_softc * sc)2039 hvn_tx_ring_destroy(struct hvn_softc *sc)
2040 {
2041 	struct hvn_tx_ring *txr;
2042 	struct hvn_tx_desc *txd;
2043 	int i, j;
2044 
2045 	if (sc->sc_txr != NULL) {
2046 		for (j = 0; j < sc->sc_ntxr; j++) {
2047 			txr = &sc->sc_txr[j];
2048 
2049 			mutex_enter(&txr->txr_lock);
2050 			for (i = 0; i < HVN_TX_DESC; i++) {
2051 				txd = &txr->txr_desc[i];
2052 				hvn_txd_gc(txr, txd);
2053 			}
2054 			mutex_exit(&txr->txr_lock);
2055 			for (i = 0; i < HVN_TX_DESC; i++) {
2056 				txd = &txr->txr_desc[i];
2057 				if (txd->txd_dmap != NULL) {
2058 					bus_dmamap_destroy(sc->sc_dmat,
2059 					    txd->txd_dmap);
2060 					txd->txd_dmap = NULL;
2061 				}
2062 			}
2063 			if (txr->txr_msgs != NULL) {
2064 				hyperv_dma_free(sc->sc_dmat, &txr->txr_dma);
2065 				txr->txr_msgs = NULL;
2066 			}
2067 			if (txr->txr_si != NULL) {
2068 				softint_disestablish(txr->txr_si);
2069 				txr->txr_si = NULL;
2070 			}
2071 			if (txr->txr_interq != NULL) {
2072 				hvn_tx_ring_qflush(sc, txr);
2073 				pcq_destroy(txr->txr_interq);
2074 				txr->txr_interq = NULL;
2075 			}
2076 
2077 			evcnt_detach(&txr->txr_evpkts);
2078 			evcnt_detach(&txr->txr_evsends);
2079 			evcnt_detach(&txr->txr_evnodesc);
2080 			evcnt_detach(&txr->txr_evdmafailed);
2081 			evcnt_detach(&txr->txr_evdefrag);
2082 			evcnt_detach(&txr->txr_evpcqdrop);
2083 			evcnt_detach(&txr->txr_evtransmitdefer);
2084 			evcnt_detach(&txr->txr_evflushfailed);
2085 			evcnt_detach(&txr->txr_evchimneytried);
2086 			evcnt_detach(&txr->txr_evchimney);
2087 			evcnt_detach(&txr->txr_evvlanfixup);
2088 			evcnt_detach(&txr->txr_evvlanhwtagging);
2089 			evcnt_detach(&txr->txr_evvlantap);
2090 
2091 			mutex_destroy(&txr->txr_lock);
2092 		}
2093 
2094 		kmem_free(sc->sc_txr, sizeof(*txr) * sc->sc_ntxr);
2095 		sc->sc_txr = NULL;
2096 	}
2097 
2098 	if (sc->sc_chim != NULL) {
2099 		hyperv_dma_free(sc->sc_dmat, &sc->sc_chim_dma);
2100 		sc->sc_chim = NULL;
2101 	}
2102 }
2103 
2104 static void
hvn_set_chim_size(struct hvn_softc * sc,int chim_size)2105 hvn_set_chim_size(struct hvn_softc *sc, int chim_size)
2106 {
2107 	struct hvn_tx_ring *txr;
2108 	int i;
2109 
2110 	for (i = 0; i < sc->sc_ntxr_inuse; i++) {
2111 		txr = &sc->sc_txr[i];
2112 		txr->txr_chim_size = chim_size;
2113 	}
2114 }
2115 
2116 #if LONG_BIT == 64
2117 #define ffsl(v)	ffs64(v)
2118 #elif LONG_BIT == 32
2119 #define ffsl(v)	ffs32(v)
2120 #else
2121 #error unsupport LONG_BIT
2122 #endif  /* LONG_BIT */
2123 
2124 static uint32_t
hvn_chim_alloc(struct hvn_softc * sc)2125 hvn_chim_alloc(struct hvn_softc *sc)
2126 {
2127 	uint32_t chim_idx = HVN_NVS_CHIM_IDX_INVALID;
2128 	int i, idx;
2129 
2130 	mutex_spin_enter(&sc->sc_chim_bmap_lock);
2131 	for (i = 0; i < sc->sc_chim_bmap_cnt; i++) {
2132 		idx = ffsl(~sc->sc_chim_bmap[i]);
2133 		if (idx == 0)
2134 			continue;
2135 
2136 		--idx;	/* ffsl is 1-based */
2137 		SET(sc->sc_chim_bmap[i], __BIT(idx));
2138 
2139 		chim_idx = i * LONG_BIT + idx;
2140 		break;
2141 	}
2142 	mutex_spin_exit(&sc->sc_chim_bmap_lock);
2143 
2144 	return chim_idx;
2145 }
2146 
2147 static void
hvn_chim_free(struct hvn_softc * sc,uint32_t chim_idx)2148 hvn_chim_free(struct hvn_softc *sc, uint32_t chim_idx)
2149 {
2150 	u_long mask;
2151 	uint32_t idx;
2152 
2153 	idx = chim_idx / LONG_BIT;
2154 	mask = __BIT(chim_idx % LONG_BIT);
2155 
2156 	mutex_spin_enter(&sc->sc_chim_bmap_lock);
2157 	CLR(sc->sc_chim_bmap[idx], mask);
2158 	mutex_spin_exit(&sc->sc_chim_bmap_lock);
2159 }
2160 
2161 static void
hvn_fixup_tx_data(struct hvn_softc * sc)2162 hvn_fixup_tx_data(struct hvn_softc *sc)
2163 {
2164 	struct hvn_tx_ring *txr;
2165 	uint64_t caps_assist;
2166 	int csum_assist;
2167 	int i;
2168 
2169 	hvn_set_chim_size(sc, sc->sc_chim_szmax);
2170 	if (hvn_tx_chimney_size > 0 && hvn_tx_chimney_size < sc->sc_chim_szmax)
2171 		hvn_set_chim_size(sc, hvn_tx_chimney_size);
2172 
2173 	caps_assist = 0;
2174 	csum_assist = 0;
2175 	if (sc->sc_caps & HVN_CAPS_IPCS) {
2176 		caps_assist |= IFCAP_CSUM_IPv4_Tx;
2177 		caps_assist |= IFCAP_CSUM_IPv4_Rx;
2178 		csum_assist |= M_CSUM_IPv4;
2179 	}
2180 	if (sc->sc_caps & HVN_CAPS_TCP4CS) {
2181 		caps_assist |= IFCAP_CSUM_TCPv4_Tx;
2182 		caps_assist |= IFCAP_CSUM_TCPv4_Rx;
2183 		csum_assist |= M_CSUM_TCPv4;
2184 	}
2185 	if (sc->sc_caps &  HVN_CAPS_TCP6CS) {
2186 		caps_assist |= IFCAP_CSUM_TCPv6_Tx;
2187 		csum_assist |= M_CSUM_TCPv6;
2188 	}
2189 	if (sc->sc_caps & HVN_CAPS_UDP4CS) {
2190 		caps_assist |= IFCAP_CSUM_UDPv4_Tx;
2191 		caps_assist |= IFCAP_CSUM_UDPv4_Rx;
2192 		csum_assist |= M_CSUM_UDPv4;
2193 	}
2194 	if (sc->sc_caps & HVN_CAPS_UDP6CS) {
2195 		caps_assist |= IFCAP_CSUM_UDPv6_Tx;
2196 		csum_assist |= M_CSUM_UDPv6;
2197 	}
2198 	for (i = 0; i < sc->sc_ntxr; i++) {
2199 		txr = &sc->sc_txr[i];
2200 		txr->txr_caps_assist = caps_assist;
2201 		txr->txr_csum_assist = csum_assist;
2202 	}
2203 
2204 	if (sc->sc_caps & HVN_CAPS_UDPHASH) {
2205 		for (i = 0; i < sc->sc_ntxr; i++) {
2206 			txr = &sc->sc_txr[i];
2207 			txr->txr_flags |= HVN_TXR_FLAG_UDP_HASH;
2208 		}
2209 	}
2210 }
2211 
2212 static int
hvn_txd_peek(struct hvn_tx_ring * txr)2213 hvn_txd_peek(struct hvn_tx_ring *txr)
2214 {
2215 
2216 	KASSERT(mutex_owned(&txr->txr_lock));
2217 
2218 	return txr->txr_avail;
2219 }
2220 
2221 static struct hvn_tx_desc *
hvn_txd_get(struct hvn_tx_ring * txr)2222 hvn_txd_get(struct hvn_tx_ring *txr)
2223 {
2224 	struct hvn_tx_desc *txd;
2225 
2226 	KASSERT(mutex_owned(&txr->txr_lock));
2227 
2228 	txd = TAILQ_FIRST(&txr->txr_list);
2229 	KASSERT(txd != NULL);
2230 	TAILQ_REMOVE(&txr->txr_list, txd, txd_entry);
2231 	txr->txr_avail--;
2232 
2233 	txd->txd_refs = 1;
2234 
2235 	return txd;
2236 }
2237 
2238 static void
hvn_txd_put(struct hvn_tx_ring * txr,struct hvn_tx_desc * txd)2239 hvn_txd_put(struct hvn_tx_ring *txr, struct hvn_tx_desc *txd)
2240 {
2241 	struct hvn_softc *sc = txr->txr_softc;
2242 	struct hvn_tx_desc *tmp_txd;
2243 
2244 	KASSERT(mutex_owned(&txr->txr_lock));
2245 	KASSERTMSG(!ISSET(txd->txd_flags, HVN_TXD_FLAG_ONAGG),
2246 	    "put an onagg txd %#x", txd->txd_flags);
2247 
2248 	KASSERTMSG(txd->txd_refs > 0, "invalid txd refs %d", txd->txd_refs);
2249 	if (atomic_dec_uint_nv(&txd->txd_refs) != 0)
2250 		return;
2251 
2252 	if (!STAILQ_EMPTY(&txd->txd_agg_list)) {
2253 		while ((tmp_txd = STAILQ_FIRST(&txd->txd_agg_list)) != NULL) {
2254 			KASSERTMSG(STAILQ_EMPTY(&tmp_txd->txd_agg_list),
2255 			    "resursive aggregation on aggregated txdesc");
2256 			KASSERTMSG(
2257 			    ISSET(tmp_txd->txd_flags, HVN_TXD_FLAG_ONAGG),
2258 			    "not aggregated txdesc");
2259 			KASSERTMSG(
2260 			    tmp_txd->txd_chim_index == HVN_NVS_CHIM_IDX_INVALID,
2261 			    "aggregated txdesc consumes chimney sending "
2262 			    "buffer: idx %u", tmp_txd->txd_chim_index);
2263 			KASSERTMSG(tmp_txd->txd_chim_size == 0,
2264 			    "aggregated txdesc has non-zero chimney sending "
2265 			    "size: sz %u", tmp_txd->txd_chim_size);
2266 
2267 			STAILQ_REMOVE_HEAD(&txd->txd_agg_list, txd_agg_entry);
2268 			CLR(tmp_txd->txd_flags, HVN_TXD_FLAG_ONAGG);
2269 			hvn_txd_put(txr, tmp_txd);
2270 		}
2271 	}
2272 
2273 	if (txd->txd_chim_index != HVN_NVS_CHIM_IDX_INVALID) {
2274 		KASSERTMSG(!ISSET(txd->txd_flags, HVN_TXD_FLAG_DMAMAP),
2275 		    "chim txd uses dmamap");
2276 		hvn_chim_free(sc, txd->txd_chim_index);
2277 		txd->txd_chim_index = HVN_NVS_CHIM_IDX_INVALID;
2278 		txd->txd_chim_size = 0;
2279 	} else if (ISSET(txd->txd_flags, HVN_TXD_FLAG_DMAMAP)) {
2280 		bus_dmamap_sync(sc->sc_dmat, txd->txd_dmap,
2281 		    0, txd->txd_dmap->dm_mapsize,
2282 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2283 		bus_dmamap_unload(sc->sc_dmat, txd->txd_dmap);
2284 		CLR(txd->txd_flags, HVN_TXD_FLAG_DMAMAP);
2285 	}
2286 
2287 	m_freem(txd->txd_buf);
2288 	txd->txd_buf = NULL;
2289 
2290 	TAILQ_INSERT_TAIL(&txr->txr_list, txd, txd_entry);
2291 	txr->txr_avail++;
2292 	txr->txr_oactive = 0;
2293 }
2294 
2295 static void
hvn_txd_gc(struct hvn_tx_ring * txr,struct hvn_tx_desc * txd)2296 hvn_txd_gc(struct hvn_tx_ring *txr, struct hvn_tx_desc *txd)
2297 {
2298 
2299 	KASSERTMSG(txd->txd_refs == 0 || txd->txd_refs == 1,
2300 	    "invalid txd refs %d", txd->txd_refs);
2301 
2302 	/* Aggregated txds will be freed by their aggregating txd. */
2303 	if (txd->txd_refs > 0 && !ISSET(txd->txd_flags, HVN_TXD_FLAG_ONAGG))
2304 		hvn_txd_put(txr, txd);
2305 }
2306 
2307 static void
hvn_txd_hold(struct hvn_tx_desc * txd)2308 hvn_txd_hold(struct hvn_tx_desc *txd)
2309 {
2310 
2311 	/* 0->1 transition will never work */
2312 	KASSERTMSG(txd->txd_refs > 0, "invalid txd refs %d", txd->txd_refs);
2313 
2314 	atomic_inc_uint(&txd->txd_refs);
2315 }
2316 
2317 static void
hvn_txd_agg(struct hvn_tx_desc * agg_txd,struct hvn_tx_desc * txd)2318 hvn_txd_agg(struct hvn_tx_desc *agg_txd, struct hvn_tx_desc *txd)
2319 {
2320 
2321 	KASSERTMSG(!ISSET(agg_txd->txd_flags, HVN_TXD_FLAG_ONAGG),
2322 	    "recursive aggregation on aggregating txdesc");
2323 	KASSERTMSG(!ISSET(txd->txd_flags, HVN_TXD_FLAG_ONAGG),
2324 	    "already aggregated");
2325 	KASSERTMSG(STAILQ_EMPTY(&txd->txd_agg_list),
2326 	    "recursive aggregation on to-be-aggregated txdesc");
2327 
2328 	SET(txd->txd_flags, HVN_TXD_FLAG_ONAGG);
2329 	STAILQ_INSERT_TAIL(&agg_txd->txd_agg_list, txd, txd_agg_entry);
2330 }
2331 
2332 static int
hvn_tx_ring_pending(struct hvn_tx_ring * txr)2333 hvn_tx_ring_pending(struct hvn_tx_ring *txr)
2334 {
2335 	int pending = 0;
2336 
2337 	mutex_enter(&txr->txr_lock);
2338 	if (hvn_txd_peek(txr) != HVN_TX_DESC)
2339 		pending = 1;
2340 	mutex_exit(&txr->txr_lock);
2341 
2342 	return pending;
2343 }
2344 
2345 static void
hvn_tx_ring_qflush(struct hvn_softc * sc,struct hvn_tx_ring * txr)2346 hvn_tx_ring_qflush(struct hvn_softc *sc, struct hvn_tx_ring *txr)
2347 {
2348 	struct mbuf *m;
2349 
2350 	while ((m = pcq_get(txr->txr_interq)) != NULL)
2351 		m_freem(m);
2352 }
2353 
2354 static int
hvn_get_lladdr(struct hvn_softc * sc,uint8_t * enaddr)2355 hvn_get_lladdr(struct hvn_softc *sc, uint8_t *enaddr)
2356 {
2357 	size_t addrlen = ETHER_ADDR_LEN;
2358 	int rv;
2359 
2360 	rv = hvn_rndis_query(sc, OID_802_3_PERMANENT_ADDRESS, enaddr, &addrlen);
2361 	if (rv == 0 && addrlen != ETHER_ADDR_LEN)
2362 		rv = -1;
2363 	return rv;
2364 }
2365 
2366 static void
hvn_update_link_status(struct hvn_softc * sc)2367 hvn_update_link_status(struct hvn_softc *sc)
2368 {
2369 	struct ifnet *ifp = SC2IFP(sc);
2370 	uint32_t state, old_link_state;
2371 	size_t len = sizeof(state);
2372 	int rv;
2373 
2374 	rv = hvn_rndis_query(sc, OID_GEN_MEDIA_CONNECT_STATUS, &state, &len);
2375 	if (rv != 0 || len != sizeof(state))
2376 		return;
2377 
2378 	old_link_state = sc->sc_link_state;
2379 	sc->sc_link_state = (state == NDIS_MEDIA_STATE_CONNECTED) ?
2380 	    LINK_STATE_UP : LINK_STATE_DOWN;
2381 	if (old_link_state != sc->sc_link_state) {
2382 		if_link_state_change(ifp, sc->sc_link_state);
2383 	}
2384 }
2385 
2386 static int
hvn_get_mtu(struct hvn_softc * sc,uint32_t * mtu)2387 hvn_get_mtu(struct hvn_softc *sc, uint32_t *mtu)
2388 {
2389 	size_t mtusz = sizeof(*mtu);
2390 	int rv;
2391 
2392 	rv = hvn_rndis_query(sc, OID_GEN_MAXIMUM_FRAME_SIZE, mtu, &mtusz);
2393 	if (rv == 0 && mtusz != sizeof(*mtu))
2394 		rv = -1;
2395 	return rv;
2396 }
2397 
2398 static int
hvn_channel_attach(struct hvn_softc * sc,struct vmbus_channel * chan)2399 hvn_channel_attach(struct hvn_softc *sc, struct vmbus_channel *chan)
2400 {
2401 	struct hvn_rx_ring *rxr;
2402 	struct hvn_tx_ring *txr;
2403 	int idx;
2404 
2405 	idx = chan->ch_subidx;
2406 	if (idx < 0 || idx >= sc->sc_nrxr_inuse) {
2407 		DPRINTF("%s: invalid sub-channel %u\n",
2408 		    device_xname(sc->sc_dev), idx);
2409 		return -1;
2410 	}
2411 
2412 	rxr = &sc->sc_rxr[idx];
2413 	rxr->rxr_chan = chan;
2414 
2415 	if (idx < sc->sc_ntxr_inuse) {
2416 		txr = &sc->sc_txr[idx];
2417 		txr->txr_chan = chan;
2418 	}
2419 
2420 	/* Bind this channel to a proper CPU. */
2421 	vmbus_channel_cpu_set(chan, HVN_RING_IDX2CPU(sc, idx));
2422 
2423 	chan->ch_flags &= ~CHF_BATCHED;
2424 
2425 	/* Associate our interrupt handler with the channel */
2426 	if (vmbus_channel_open(chan,
2427 	    HVN_RING_BUFSIZE - sizeof(struct vmbus_bufring), NULL, 0,
2428 	    hvn_nvs_intr, rxr)) {
2429 		DPRINTF("%s: failed to open channel\n",
2430 		    device_xname(sc->sc_dev));
2431 		return -1;
2432 	}
2433 
2434 	return 0;
2435 }
2436 
2437 static void
hvn_channel_detach(struct hvn_softc * sc,struct vmbus_channel * chan)2438 hvn_channel_detach(struct hvn_softc *sc, struct vmbus_channel *chan)
2439 {
2440 
2441 	vmbus_channel_close_direct(chan);
2442 }
2443 
2444 static void
hvn_channel_detach_all(struct hvn_softc * sc)2445 hvn_channel_detach_all(struct hvn_softc *sc)
2446 {
2447 	struct vmbus_channel **subchans;
2448 	int i, subchan_cnt = sc->sc_nrxr_inuse - 1;
2449 
2450 	if (subchan_cnt > 0) {
2451 		/* Detach the sub-channels. */
2452 		subchans = vmbus_subchannel_get(sc->sc_prichan, subchan_cnt);
2453 		for (i = 0; i < subchan_cnt; i++)
2454 			hvn_channel_detach(sc, subchans[i]);
2455 		vmbus_subchannel_rel(subchans, subchan_cnt);
2456 	}
2457 
2458 	/*
2459 	 * Detach the primary channel, _after_ all sub-channels
2460 	 * are detached.
2461 	 */
2462 	hvn_channel_detach(sc, sc->sc_prichan);
2463 
2464 	/* Wait for sub-channels to be destroyed, if any. */
2465 	vmbus_subchannel_drain(sc->sc_prichan);
2466 }
2467 
2468 static int
hvn_subchannel_attach(struct hvn_softc * sc)2469 hvn_subchannel_attach(struct hvn_softc *sc)
2470 {
2471 	struct vmbus_channel **subchans;
2472 	int subchan_cnt = sc->sc_nrxr_inuse - 1;
2473 	int i, error = 0;
2474 
2475 	KASSERTMSG(subchan_cnt > 0, "no sub-channels");
2476 
2477 	/* Attach the sub-channels. */
2478 	subchans = vmbus_subchannel_get(sc->sc_prichan, subchan_cnt);
2479 	for (i = 0; i < subchan_cnt; ++i) {
2480 		int error1;
2481 
2482 		error1 = hvn_channel_attach(sc, subchans[i]);
2483 		if (error1) {
2484 			error = error1;
2485 			/* Move on; all channels will be detached later. */
2486 		}
2487 	}
2488 	vmbus_subchannel_rel(subchans, subchan_cnt);
2489 
2490 	if (error) {
2491 		aprint_error_dev(sc->sc_dev,
2492 		    "sub-channels attach failed: %d\n", error);
2493 		return error;
2494 	}
2495 
2496 	aprint_debug_dev(sc->sc_dev, "%d sub-channels attached\n",
2497 	    subchan_cnt);
2498 	return 0;
2499 }
2500 
2501 static int
hvn_synth_alloc_subchannels(struct hvn_softc * sc,int * nsubch)2502 hvn_synth_alloc_subchannels(struct hvn_softc *sc, int *nsubch)
2503 {
2504 	struct vmbus_channel **subchans;
2505 	int error, nchan, rxr_cnt;
2506 
2507 	nchan = *nsubch + 1;
2508 	if (nchan < 2) {
2509 		/* Multiple RX/TX rings are not requested. */
2510 		*nsubch = 0;
2511 		return 0;
2512 	}
2513 
2514 	/*
2515 	 * Query RSS capabilities, e.g. # of RX rings, and # of indirect
2516 	 * table entries.
2517 	 */
2518 	if (hvn_get_rsscaps(sc, &rxr_cnt)) {
2519 		/* No RSS. */
2520 		*nsubch = 0;
2521 		return 0;
2522 	}
2523 
2524 	aprint_debug_dev(sc->sc_dev, "RX rings offered %u, requested %d\n",
2525 	    rxr_cnt, nchan);
2526 
2527 	if (nchan > rxr_cnt)
2528 		nchan = rxr_cnt;
2529 	if (nchan == 1) {
2530 		aprint_debug_dev(sc->sc_dev,
2531 		    "only 1 channel is supported, no vRSS\n");
2532 		*nsubch = 0;
2533 		return 0;
2534 	}
2535 
2536 	*nsubch = nchan - 1;
2537 	error = hvn_nvs_alloc_subchannels(sc, nsubch);
2538 	if (error || *nsubch == 0) {
2539 		/* Failed to allocate sub-channels. */
2540 		*nsubch = 0;
2541 		return 0;
2542 	}
2543 
2544 	/*
2545 	 * Wait for all sub-channels to become ready before moving on.
2546 	 */
2547 	subchans = vmbus_subchannel_get(sc->sc_prichan, *nsubch);
2548 	vmbus_subchannel_rel(subchans, *nsubch);
2549 	return 0;
2550 }
2551 
2552 static int
hvn_synth_attachable(const struct hvn_softc * sc)2553 hvn_synth_attachable(const struct hvn_softc *sc)
2554 {
2555 #if 0
2556 	const struct hvn_rx_ring *rxr;
2557 	int i;
2558 
2559 	for (i = 0; i < sc->sc_nrxr; i++) {
2560 		rxr = &sc->sc_rxr[i];
2561 		if (rxr->rxr_flags)
2562 			return 0;
2563 	}
2564 #endif
2565 	return 1;
2566 }
2567 
2568 /*
2569  * Make sure that the RX filter is zero after the successful
2570  * RNDIS initialization.
2571  *
2572  * NOTE:
2573  * Under certain conditions on certain versions of Hyper-V,
2574  * the RNDIS rxfilter is _not_ zero on the hypervisor side
2575  * after the successful RNDIS initialization, which breaks
2576  * the assumption of any following code (well, it breaks the
2577  * RNDIS API contract actually).  Clear the RNDIS rxfilter
2578  * explicitly, drain packets sneaking through, and drain the
2579  * interrupt taskqueues scheduled due to the stealth packets.
2580  */
2581 static void
hvn_init_fixat(struct hvn_softc * sc,int nchan)2582 hvn_init_fixat(struct hvn_softc *sc, int nchan)
2583 {
2584 
2585 	hvn_disable_rx(sc);
2586 	hvn_drain_rxtx(sc, nchan);
2587 }
2588 
2589 static void
hvn_set_txagg(struct hvn_softc * sc)2590 hvn_set_txagg(struct hvn_softc *sc)
2591 {
2592 	struct hvn_tx_ring *txr;
2593 	uint32_t size, pkts;
2594 	int i;
2595 
2596 	/*
2597 	 * Setup aggregation size.
2598 	 */
2599 	if (sc->sc_agg_size < 0)
2600 		size = UINT32_MAX;
2601 	else
2602 		size = sc->sc_agg_size;
2603 
2604 	if (size > sc->sc_rndis_agg_size)
2605 		size = sc->sc_rndis_agg_size;
2606 
2607 	/* NOTE: We only aggregate packets using chimney sending buffers. */
2608 	if (size > (uint32_t)sc->sc_chim_szmax)
2609 		size = sc->sc_chim_szmax;
2610 
2611 	if (size <= 2 * HVN_PKTSIZE_MIN(sc->sc_rndis_agg_align)) {
2612 		/* Disable */
2613 		size = 0;
2614 		pkts = 0;
2615 		goto done;
2616 	}
2617 
2618 	/* NOTE: Type of the per TX ring setting is 'int'. */
2619 	if (size > INT_MAX)
2620 		size = INT_MAX;
2621 
2622 	/*
2623 	 * Setup aggregation packet count.
2624 	 */
2625 	if (sc->sc_agg_pkts < 0)
2626 		pkts = UINT32_MAX;
2627 	else
2628 		pkts = sc->sc_agg_pkts;
2629 
2630 	if (pkts > sc->sc_rndis_agg_pkts)
2631 		pkts = sc->sc_rndis_agg_pkts;
2632 
2633 	if (pkts <= 1) {
2634 		/* Disable */
2635 		size = 0;
2636 		pkts = 0;
2637 		goto done;
2638 	}
2639 
2640 	/* NOTE: Type of the per TX ring setting is 'short'. */
2641 	if (pkts > SHRT_MAX)
2642 		pkts = SHRT_MAX;
2643 
2644 done:
2645 	/* NOTE: Type of the per TX ring setting is 'short'. */
2646 	if (sc->sc_rndis_agg_align > SHRT_MAX) {
2647 		/* Disable */
2648 		size = 0;
2649 		pkts = 0;
2650 	}
2651 
2652 	aprint_verbose_dev(sc->sc_dev,
2653 	    "TX aggregate size %u, pkts %u, align %u\n",
2654 	    size, pkts, sc->sc_rndis_agg_align);
2655 
2656 	for (i = 0; i < sc->sc_ntxr_inuse; ++i) {
2657 		txr = &sc->sc_txr[i];
2658 
2659 		mutex_enter(&txr->txr_lock);
2660 		txr->txr_agg_szmax = size;
2661 		txr->txr_agg_pktmax = pkts;
2662 		txr->txr_agg_align = sc->sc_rndis_agg_align;
2663 		mutex_exit(&txr->txr_lock);
2664 	}
2665 }
2666 
2667 static int
hvn_synth_attach(struct hvn_softc * sc,int mtu)2668 hvn_synth_attach(struct hvn_softc *sc, int mtu)
2669 {
2670 	uint8_t rss_key[RSS_KEYSIZE];
2671 	uint32_t old_caps;
2672 	int nchan = 1, nsubch;
2673 	int i, error;
2674 
2675 	if (!hvn_synth_attachable(sc))
2676 		return ENXIO;
2677 
2678 	/* Save capabilities for later verification. */
2679 	old_caps = sc->sc_caps;
2680 	sc->sc_caps = 0;
2681 
2682 	/* Clear RSS stuffs. */
2683 	sc->sc_rss_ind_size = 0;
2684 	sc->sc_rss_hash = 0;
2685 	sc->sc_rss_hcap = 0;
2686 
2687 	/*
2688 	 * Attach the primary channel _before_ attaching NVS and RNDIS.
2689 	 */
2690 	error = hvn_channel_attach(sc, sc->sc_prichan);
2691 	if (error) {
2692 		aprint_error_dev(sc->sc_dev,
2693 		    "failed to attach primary channel\n");
2694 		goto failed;
2695 	}
2696 
2697 	/*
2698 	 * Attach NVS.
2699 	 */
2700 	error = hvn_nvs_attach(sc, mtu);
2701 	if (error) {
2702 		aprint_error_dev(sc->sc_dev, "failed to init NVSP\n");
2703 		goto detach_channel;
2704 	}
2705 
2706 	/*
2707 	 * Attach RNDIS _after_ NVS is attached.
2708 	 */
2709 	error = hvn_rndis_attach(sc, mtu);
2710 	if (error) {
2711 		aprint_error_dev(sc->sc_dev, "failed to init RNDIS\n");
2712 		goto detach_nvs;
2713 	}
2714 
2715 	error = hvn_set_capabilities(sc, mtu);
2716 	if (error) {
2717 		aprint_error_dev(sc->sc_dev, "failed to setup offloading\n");
2718 		goto detach_rndis;
2719 	}
2720 
2721 	if ((sc->sc_flags & HVN_SCF_ATTACHED) && old_caps != sc->sc_caps) {
2722 		device_printf(sc->sc_dev, "caps mismatch "
2723 		    "old 0x%08x, new 0x%08x\n", old_caps, sc->sc_caps);
2724 		error = ENXIO;
2725 		goto detach_rndis;
2726 	}
2727 
2728 	/*
2729 	 * Allocate sub-channels for multi-TX/RX rings.
2730 	 *
2731 	 * NOTE:
2732 	 * The # of RX rings that can be used is equivalent to the # of
2733 	 * channels to be requested.
2734 	 */
2735 	nsubch = sc->sc_nrxr - 1;
2736 	error = hvn_synth_alloc_subchannels(sc, &nsubch);
2737 	if (error) {
2738 		aprint_error_dev(sc->sc_dev,
2739 		    "failed to allocate sub channels\n");
2740 		goto detach_synth;
2741 	}
2742 
2743 	/*
2744 	 * Set the # of TX/RX rings that could be used according to
2745 	 * the # of channels that NVS offered.
2746 	 */
2747 	nchan = nsubch + 1;
2748 	hvn_set_ring_inuse(sc, nchan);
2749 
2750 	if (nchan > 1) {
2751 		/*
2752 		 * Attach the sub-channels.
2753 		 *
2754 		 * NOTE: hvn_set_ring_inuse() _must_ have been called.
2755 		 */
2756 		error = hvn_subchannel_attach(sc);
2757 		if (error) {
2758 			aprint_error_dev(sc->sc_dev,
2759 			    "failed to attach sub channels\n");
2760 			goto detach_synth;
2761 		}
2762 
2763 		/*
2764 		 * Configure RSS key and indirect table _after_ all sub-channels
2765 		 * are attached.
2766 		 */
2767 		if (!(sc->sc_flags & HVN_SCF_HAS_RSSKEY)) {
2768 			/* Set the default RSS key. */
2769 			CTASSERT(sizeof(sc->sc_rss.rss_key) == sizeof(rss_key));
2770 			rss_getkey(rss_key);
2771 			memcpy(&sc->sc_rss.rss_key, rss_key,
2772 			    sizeof(sc->sc_rss.rss_key));
2773 			sc->sc_flags |= HVN_SCF_HAS_RSSKEY;
2774 		}
2775 
2776 		if (!(sc->sc_flags & HVN_SCF_HAS_RSSIND)) {
2777 			/* Setup RSS indirect table in round-robin fashion. */
2778 			for (i = 0; i < NDIS_HASH_INDCNT; i++) {
2779 				sc->sc_rss.rss_ind[i] = i % nchan;
2780 			}
2781 			sc->sc_flags |= HVN_SCF_HAS_RSSIND;
2782 		} else {
2783 			/*
2784 			 * # of usable channels may be changed, so we have to
2785 			 * make sure that all entries in RSS indirect table
2786 			 * are valid.
2787 			 *
2788 			 * NOTE: hvn_set_ring_inuse() _must_ have been called.
2789 			 */
2790 			hvn_fixup_rss_ind(sc);
2791 		}
2792 
2793 		sc->sc_rss_hash = sc->sc_rss_hcap;
2794 		error = hvn_set_rss(sc, NDIS_RSS_FLAG_NONE);
2795 		if (error) {
2796 			aprint_error_dev(sc->sc_dev, "failed to setup RSS\n");
2797 			goto detach_synth;
2798 		}
2799 	}
2800 
2801 	/*
2802 	 * Fixup transmission aggregation setup.
2803 	 */
2804 	hvn_set_txagg(sc);
2805 	hvn_init_fixat(sc, nchan);
2806 	return 0;
2807 
2808 detach_synth:
2809 	hvn_init_fixat(sc, nchan);
2810 	hvn_synth_detach(sc);
2811 	return error;
2812 
2813 detach_rndis:
2814 	hvn_init_fixat(sc, nchan);
2815 	hvn_rndis_detach(sc);
2816 detach_nvs:
2817 	hvn_nvs_detach(sc);
2818 detach_channel:
2819 	hvn_channel_detach(sc, sc->sc_prichan);
2820 failed:
2821 	/* Restore old capabilities. */
2822 	sc->sc_caps = old_caps;
2823 	return error;
2824 }
2825 
2826 static void
hvn_synth_detach(struct hvn_softc * sc)2827 hvn_synth_detach(struct hvn_softc *sc)
2828 {
2829 
2830 	/* Detach the RNDIS first. */
2831 	hvn_rndis_detach(sc);
2832 
2833 	/* Detach NVS. */
2834 	hvn_nvs_detach(sc);
2835 
2836 	/* Detach all of the channels. */
2837 	hvn_channel_detach_all(sc);
2838 
2839 	if (sc->sc_prichan->ch_sc->sc_proto >= VMBUS_VERSION_WIN10 &&
2840 	    sc->sc_rx_hndl) {
2841 		/*
2842 		 * Host is post-Win2016, disconnect RXBUF from primary channel
2843 		 * here.
2844 		 */
2845 		vmbus_handle_free(sc->sc_prichan, sc->sc_rx_hndl);
2846 		sc->sc_rx_hndl = 0;
2847 	}
2848 
2849 	if (sc->sc_prichan->ch_sc->sc_proto >= VMBUS_VERSION_WIN10 &&
2850 	    sc->sc_chim_hndl) {
2851 		/*
2852 		 * Host is post-Win2016, disconnect chimney sending buffer
2853 		 * from primary channel here.
2854 		 */
2855 		vmbus_handle_free(sc->sc_prichan, sc->sc_chim_hndl);
2856 		sc->sc_chim_hndl = 0;
2857 	}
2858 }
2859 
2860 static void
hvn_set_ring_inuse(struct hvn_softc * sc,int ring_cnt)2861 hvn_set_ring_inuse(struct hvn_softc *sc, int ring_cnt)
2862 {
2863 
2864 	if (sc->sc_ntxr > ring_cnt)
2865 		sc->sc_ntxr_inuse = ring_cnt;
2866 	else
2867 		sc->sc_ntxr_inuse = sc->sc_ntxr;
2868 	sc->sc_nrxr_inuse = ring_cnt;
2869 }
2870 
2871 static void
hvn_channel_drain(struct hvn_softc * sc,struct vmbus_channel * chan)2872 hvn_channel_drain(struct hvn_softc *sc, struct vmbus_channel *chan)
2873 {
2874 	struct hvn_rx_ring *rxr;
2875 	int i, s;
2876 
2877 	for (rxr = NULL, i = 0; i < sc->sc_nrxr_inuse; i++) {
2878 		rxr = &sc->sc_rxr[i];
2879 		if (rxr->rxr_chan == chan)
2880 			break;
2881 	}
2882 	KASSERT(i < sc->sc_nrxr_inuse);
2883 
2884 	/*
2885 	 * NOTE:
2886 	 * The TX bufring will not be drained by the hypervisor,
2887 	 * if the primary channel is revoked.
2888 	 */
2889 	while (!vmbus_channel_rx_empty(chan) ||
2890 	    (!vmbus_channel_is_revoked(sc->sc_prichan) &&
2891 	     !vmbus_channel_tx_empty(chan))) {
2892 		DELAY(20);
2893 		s = splnet();
2894 		hvn_nvs_intr1(rxr, sc->sc_tx_process_limit,
2895 		    sc->sc_rx_process_limit);
2896 		splx(s);
2897 	}
2898 
2899 	mutex_enter(&rxr->rxr_onwork_lock);
2900 	while (rxr->rxr_onlist || rxr->rxr_onproc)
2901 		cv_wait(&rxr->rxr_onwork_cv, &rxr->rxr_onwork_lock);
2902 	mutex_exit(&rxr->rxr_onwork_lock);
2903 }
2904 
2905 static void
hvn_disable_rx(struct hvn_softc * sc)2906 hvn_disable_rx(struct hvn_softc *sc)
2907 {
2908 
2909 	/*
2910 	 * Disable RX by clearing RX filter forcefully.
2911 	 */
2912 	(void)hvn_rndis_close(sc);	/* ignore error */
2913 
2914 	/*
2915 	 * Give RNDIS enough time to flush all pending data packets.
2916 	 */
2917 	DELAY(200);
2918 }
2919 
2920 static void
hvn_drain_rxtx(struct hvn_softc * sc,int nchan)2921 hvn_drain_rxtx(struct hvn_softc *sc, int nchan)
2922 {
2923 	struct vmbus_channel **subchans = NULL;
2924 	int i, nsubch;
2925 
2926 	/*
2927 	 * Drain RX/TX bufrings and interrupts.
2928 	 */
2929 	nsubch = nchan - 1;
2930 	if (nsubch > 0)
2931 		subchans = vmbus_subchannel_get(sc->sc_prichan, nsubch);
2932 
2933 	if (subchans != NULL) {
2934 		for (i = 0; i < nsubch; ++i)
2935 			hvn_channel_drain(sc, subchans[i]);
2936 	}
2937 	hvn_channel_drain(sc, sc->sc_prichan);
2938 
2939 	if (subchans != NULL)
2940 		vmbus_subchannel_rel(subchans, nsubch);
2941 }
2942 
2943 static void
hvn_suspend_data(struct hvn_softc * sc)2944 hvn_suspend_data(struct hvn_softc *sc)
2945 {
2946 	struct hvn_tx_ring *txr;
2947 	int i, s;
2948 
2949 	/*
2950 	 * Suspend TX.
2951 	 */
2952 	for (i = 0; i < sc->sc_ntxr_inuse; i++) {
2953 		txr = &sc->sc_txr[i];
2954 
2955 		mutex_enter(&txr->txr_lock);
2956 		txr->txr_suspended = 1;
2957 		mutex_exit(&txr->txr_lock);
2958 		/* No one is able send more packets now. */
2959 
2960 		/*
2961 		 * Wait for all pending sends to finish.
2962 		 *
2963 		 * NOTE:
2964 		 * We will _not_ receive all pending send-done, if the
2965 		 * primary channel is revoked.
2966 		 */
2967 		while (hvn_tx_ring_pending(txr) &&
2968 		    !vmbus_channel_is_revoked(sc->sc_prichan)) {
2969 			DELAY(20);
2970 			s = splnet();
2971 			hvn_nvs_intr1(txr->txr_rxr, sc->sc_tx_process_limit,
2972 			    sc->sc_rx_process_limit);
2973 			splx(s);
2974 		}
2975 	}
2976 
2977 	/*
2978 	 * Disable RX.
2979 	 */
2980 	hvn_disable_rx(sc);
2981 
2982 	/*
2983 	 * Drain RX/TX.
2984 	 */
2985 	hvn_drain_rxtx(sc, sc->sc_nrxr_inuse);
2986 }
2987 
2988 static void
hvn_suspend_mgmt(struct hvn_softc * sc)2989 hvn_suspend_mgmt(struct hvn_softc *sc)
2990 {
2991 
2992 	sc->sc_link_suspend = true;
2993 	callout_halt(&sc->sc_link_tmout, NULL);
2994 
2995 	/* Drain link state task */
2996 	mutex_enter(&sc->sc_link_lock);
2997 	for (;;) {
2998 		if (!sc->sc_link_onproc)
2999 			break;
3000 		mutex_exit(&sc->sc_link_lock);
3001 		DELAY(20);
3002 		mutex_enter(&sc->sc_link_lock);
3003 	}
3004 	mutex_exit(&sc->sc_link_lock);
3005 }
3006 
3007 static void
hvn_suspend(struct hvn_softc * sc)3008 hvn_suspend(struct hvn_softc *sc)
3009 {
3010 	struct ifnet *ifp = SC2IFP(sc);
3011 
3012 	if (ifp->if_flags & IFF_RUNNING)
3013 		hvn_suspend_data(sc);
3014 	hvn_suspend_mgmt(sc);
3015 }
3016 
3017 static void
hvn_resume_tx(struct hvn_softc * sc,int ring_cnt)3018 hvn_resume_tx(struct hvn_softc *sc, int ring_cnt)
3019 {
3020 	struct hvn_tx_ring *txr;
3021 	int i;
3022 
3023 	for (i = 0; i < ring_cnt; i++) {
3024 		txr = &sc->sc_txr[i];
3025 		mutex_enter(&txr->txr_lock);
3026 		txr->txr_suspended = 0;
3027 		mutex_exit(&txr->txr_lock);
3028 	}
3029 }
3030 
3031 static void
hvn_resume_data(struct hvn_softc * sc)3032 hvn_resume_data(struct hvn_softc *sc)
3033 {
3034 	struct ifnet *ifp = SC2IFP(sc);
3035 	struct hvn_tx_ring *txr;
3036 	int i;
3037 
3038 	/*
3039 	 * Re-enable RX.
3040 	 */
3041 	hvn_rndis_open(sc);
3042 
3043 	/*
3044 	 * Make sure to clear suspend status on "all" TX rings,
3045 	 * since sc_ntxr_inuse can be changed after hvn_suspend_data().
3046 	 */
3047 	hvn_resume_tx(sc, sc->sc_ntxr);
3048 
3049 	/*
3050 	 * Flush unused mbuf, since sc_ntxr_inuse may be reduced.
3051 	 */
3052 	for (i = sc->sc_ntxr_inuse; i < sc->sc_ntxr; i++)
3053 		hvn_tx_ring_qflush(sc, &sc->sc_txr[i]);
3054 
3055 	/*
3056 	 * Kick start TX.
3057 	 */
3058 	for (i = 0; i < sc->sc_ntxr_inuse; i++) {
3059 		txr = &sc->sc_txr[i];
3060 		mutex_enter(&txr->txr_lock);
3061 		txr->txr_oactive = 0;
3062 
3063 		/* ALTQ */
3064 		if (txr->txr_id == 0)
3065 			if_schedule_deferred_start(ifp);
3066 		softint_schedule(txr->txr_si);
3067 		mutex_exit(&txr->txr_lock);
3068 	}
3069 }
3070 
3071 static void
hvn_resume_mgmt(struct hvn_softc * sc)3072 hvn_resume_mgmt(struct hvn_softc *sc)
3073 {
3074 
3075 	sc->sc_link_suspend = false;
3076 	hvn_link_event(sc, HVN_LINK_EV_RESUME_NETWORK);
3077 }
3078 
3079 static void
hvn_resume(struct hvn_softc * sc)3080 hvn_resume(struct hvn_softc *sc)
3081 {
3082 	struct ifnet *ifp = SC2IFP(sc);
3083 
3084 	if (ifp->if_flags & IFF_RUNNING)
3085 		hvn_resume_data(sc);
3086 	hvn_resume_mgmt(sc);
3087 }
3088 
3089 static int
hvn_nvs_init(struct hvn_softc * sc)3090 hvn_nvs_init(struct hvn_softc *sc)
3091 {
3092 
3093 	mutex_init(&sc->sc_nvsrsp_lock, MUTEX_DEFAULT, IPL_NET);
3094 	cv_init(&sc->sc_nvsrsp_cv, "nvsrspcv");
3095 
3096 	return 0;
3097 }
3098 
3099 static void
hvn_nvs_destroy(struct hvn_softc * sc)3100 hvn_nvs_destroy(struct hvn_softc *sc)
3101 {
3102 
3103 	mutex_destroy(&sc->sc_nvsrsp_lock);
3104 	cv_destroy(&sc->sc_nvsrsp_cv);
3105 }
3106 
3107 static int
hvn_nvs_doinit(struct hvn_softc * sc,uint32_t proto)3108 hvn_nvs_doinit(struct hvn_softc *sc, uint32_t proto)
3109 {
3110 	struct hvn_nvs_init cmd;
3111 	struct hvn_nvs_init_resp *rsp;
3112 	uint64_t tid;
3113 	int error;
3114 
3115 	memset(&cmd, 0, sizeof(cmd));
3116 	cmd.nvs_type = HVN_NVS_TYPE_INIT;
3117 	cmd.nvs_ver_min = cmd.nvs_ver_max = proto;
3118 
3119 	tid = atomic_inc_uint_nv(&sc->sc_nvstid);
3120 	mutex_enter(&sc->sc_nvsrsp_lock);
3121 	error = hvn_nvs_cmd(sc, &cmd, sizeof(cmd), tid, 0);
3122 	if (error == 0) {
3123 		rsp = (struct hvn_nvs_init_resp *)&sc->sc_nvsrsp;
3124 		if (rsp->nvs_status != HVN_NVS_STATUS_OK)
3125 			error = EINVAL;
3126 	}
3127 	mutex_exit(&sc->sc_nvsrsp_lock);
3128 
3129 	return error;
3130 }
3131 
3132 static int
hvn_nvs_conf_ndis(struct hvn_softc * sc,int mtu)3133 hvn_nvs_conf_ndis(struct hvn_softc *sc, int mtu)
3134 {
3135 	struct hvn_nvs_ndis_conf cmd;
3136 	uint64_t tid;
3137 	int error;
3138 
3139 	memset(&cmd, 0, sizeof(cmd));
3140 	cmd.nvs_type = HVN_NVS_TYPE_NDIS_CONF;
3141 	cmd.nvs_mtu = mtu + ETHER_HDR_LEN;
3142 	cmd.nvs_caps = HVN_NVS_NDIS_CONF_VLAN;
3143 
3144 	tid = atomic_inc_uint_nv(&sc->sc_nvstid);
3145 	mutex_enter(&sc->sc_nvsrsp_lock);
3146 	/* NOTE: No response. */
3147 	error = hvn_nvs_cmd(sc, &cmd, sizeof(cmd), tid, 0);
3148 	mutex_exit(&sc->sc_nvsrsp_lock);
3149 
3150 	if (error == 0)
3151 		sc->sc_caps |= HVN_CAPS_MTU | HVN_CAPS_VLAN;
3152 	return error;
3153 }
3154 
3155 static int
hvn_nvs_init_ndis(struct hvn_softc * sc)3156 hvn_nvs_init_ndis(struct hvn_softc *sc)
3157 {
3158 	struct hvn_nvs_ndis_init cmd;
3159 	uint64_t tid;
3160 	int error;
3161 
3162 	memset(&cmd, 0, sizeof(cmd));
3163 	cmd.nvs_type = HVN_NVS_TYPE_NDIS_INIT;
3164 	cmd.nvs_ndis_major = (sc->sc_ndisver & 0xffff0000) >> 16;
3165 	cmd.nvs_ndis_minor = sc->sc_ndisver & 0x0000ffff;
3166 
3167 	tid = atomic_inc_uint_nv(&sc->sc_nvstid);
3168 	mutex_enter(&sc->sc_nvsrsp_lock);
3169 	/* NOTE: No response. */
3170 	error = hvn_nvs_cmd(sc, &cmd, sizeof(cmd), tid, 0);
3171 	mutex_exit(&sc->sc_nvsrsp_lock);
3172 
3173 	return error;
3174 }
3175 
3176 static int
hvn_nvs_attach(struct hvn_softc * sc,int mtu)3177 hvn_nvs_attach(struct hvn_softc *sc, int mtu)
3178 {
3179 	static const uint32_t protos[] = {
3180 		HVN_NVS_PROTO_VERSION_5,
3181 		HVN_NVS_PROTO_VERSION_4,
3182 		HVN_NVS_PROTO_VERSION_2,
3183 		HVN_NVS_PROTO_VERSION_1
3184 	};
3185 	int i;
3186 
3187 	if (hyperv_ver_major >= 10)
3188 		sc->sc_caps |= HVN_CAPS_UDPHASH;
3189 
3190 	/*
3191 	 * Initialize NVS.
3192 	 */
3193 	if (sc->sc_flags & HVN_SCF_ATTACHED) {
3194 		/*
3195 		 * NVS version and NDIS version MUST NOT be changed.
3196 		 */
3197 		DPRINTF("%s: reinit NVS version %#x, NDIS version %u.%u\n",
3198 		    device_xname(sc->sc_dev), sc->sc_proto,
3199 		    (sc->sc_ndisver >> 16), sc->sc_ndisver & 0xffff);
3200 
3201 		if (hvn_nvs_doinit(sc, sc->sc_proto)) {
3202 			DPRINTF("%s: failed to reinit NVSP version %#x\n",
3203 			    device_xname(sc->sc_dev), sc->sc_proto);
3204 			return -1;
3205 		}
3206 	} else {
3207 		/*
3208 		 * Find the supported NVS version and set NDIS version
3209 		 * accordingly.
3210 		 */
3211 		for (i = 0; i < __arraycount(protos); i++) {
3212 			if (hvn_nvs_doinit(sc, protos[i]) == 0)
3213 				break;
3214 		}
3215 		if (i == __arraycount(protos)) {
3216 			DPRINTF("%s: failed to negotiate NVSP version\n",
3217 			    device_xname(sc->sc_dev));
3218 			return -1;
3219 		}
3220 
3221 		sc->sc_proto = protos[i];
3222 		if (sc->sc_proto <= HVN_NVS_PROTO_VERSION_4)
3223 			sc->sc_ndisver = NDIS_VERSION_6_1;
3224 		else
3225 			sc->sc_ndisver = NDIS_VERSION_6_30;
3226 
3227 		DPRINTF("%s: NVS version %#x, NDIS version %u.%u\n",
3228 		    device_xname(sc->sc_dev), sc->sc_proto,
3229 		    (sc->sc_ndisver >> 16), sc->sc_ndisver & 0xffff);
3230 	}
3231 
3232 	if (sc->sc_proto >= HVN_NVS_PROTO_VERSION_5)
3233 		sc->sc_caps |= HVN_CAPS_HASHVAL;
3234 
3235 	if (sc->sc_proto >= HVN_NVS_PROTO_VERSION_2) {
3236 		/*
3237 		 * Configure NDIS before initializing it.
3238 		 */
3239 		if (hvn_nvs_conf_ndis(sc, mtu))
3240 			return -1;
3241 	}
3242 
3243 	/*
3244 	 * Initialize NDIS.
3245 	 */
3246 	if (hvn_nvs_init_ndis(sc))
3247 		return -1;
3248 
3249 	/*
3250 	 * Connect RXBUF.
3251 	 */
3252 	if (hvn_nvs_connect_rxbuf(sc))
3253 		return -1;
3254 
3255 	/*
3256 	 * Connect chimney sending buffer.
3257 	 */
3258 	if (hvn_nvs_connect_chim(sc))
3259 		return -1;
3260 
3261 	return 0;
3262 }
3263 
3264 static int
hvn_nvs_connect_rxbuf(struct hvn_softc * sc)3265 hvn_nvs_connect_rxbuf(struct hvn_softc *sc)
3266 {
3267 	struct hvn_nvs_rxbuf_conn cmd;
3268 	struct hvn_nvs_rxbuf_conn_resp *rsp;
3269 	uint64_t tid;
3270 
3271 	if (vmbus_handle_alloc(sc->sc_prichan, &sc->sc_rx_dma, sc->sc_rx_size,
3272 	    &sc->sc_rx_hndl)) {
3273 		DPRINTF("%s: failed to obtain a PA handle\n",
3274 		    device_xname(sc->sc_dev));
3275 		return -1;
3276 	}
3277 
3278 	memset(&cmd, 0, sizeof(cmd));
3279 	cmd.nvs_type = HVN_NVS_TYPE_RXBUF_CONN;
3280 	cmd.nvs_gpadl = sc->sc_rx_hndl;
3281 	cmd.nvs_sig = HVN_NVS_RXBUF_SIG;
3282 
3283 	tid = atomic_inc_uint_nv(&sc->sc_nvstid);
3284 	mutex_enter(&sc->sc_nvsrsp_lock);
3285 	if (hvn_nvs_cmd(sc, &cmd, sizeof(cmd), tid, 0))
3286 		goto errout;
3287 
3288 	rsp = (struct hvn_nvs_rxbuf_conn_resp *)&sc->sc_nvsrsp;
3289 	if (rsp->nvs_status != HVN_NVS_STATUS_OK) {
3290 		DPRINTF("%s: failed to set up the Rx ring\n",
3291 		    device_xname(sc->sc_dev));
3292 		goto errout;
3293 	}
3294 
3295 	SET(sc->sc_flags, HVN_SCF_RXBUF_CONNECTED);
3296 
3297 	if (rsp->nvs_nsect > 1) {
3298 		DPRINTF("%s: invalid number of Rx ring sections: %u\n",
3299 		    device_xname(sc->sc_dev), rsp->nvs_nsect);
3300 		goto errout;
3301 	}
3302 	mutex_exit(&sc->sc_nvsrsp_lock);
3303 
3304 	return 0;
3305 
3306  errout:
3307 	mutex_exit(&sc->sc_nvsrsp_lock);
3308 	hvn_nvs_disconnect_rxbuf(sc);
3309 	return -1;
3310 }
3311 
3312 static int
hvn_nvs_disconnect_rxbuf(struct hvn_softc * sc)3313 hvn_nvs_disconnect_rxbuf(struct hvn_softc *sc)
3314 {
3315 	struct hvn_nvs_rxbuf_disconn cmd;
3316 	uint64_t tid;
3317 	int s, error;
3318 
3319 	if (ISSET(sc->sc_flags, HVN_SCF_RXBUF_CONNECTED)) {
3320 		memset(&cmd, 0, sizeof(cmd));
3321 		cmd.nvs_type = HVN_NVS_TYPE_RXBUF_DISCONN;
3322 		cmd.nvs_sig = HVN_NVS_RXBUF_SIG;
3323 
3324 		tid = atomic_inc_uint_nv(&sc->sc_nvstid);
3325 		mutex_enter(&sc->sc_nvsrsp_lock);
3326 		error = hvn_nvs_cmd(sc, &cmd, sizeof(cmd), tid,
3327 		    HVN_NVS_CMD_NORESP);
3328 		if (error) {
3329 			device_printf(sc->sc_dev,
3330 			    "failed to send rxbuf disconn: %d", error);
3331 		}
3332 		CLR(sc->sc_flags, HVN_SCF_RXBUF_CONNECTED);
3333 		mutex_exit(&sc->sc_nvsrsp_lock);
3334 
3335 		/*
3336 		 * Wait for the hypervisor to receive this NVS request.
3337 		 *
3338 		 * NOTE:
3339 		 * The TX bufring will not be drained by the hypervisor,
3340 		 * if the primary channel is revoked.
3341 		 */
3342 		while (!vmbus_channel_tx_empty(sc->sc_prichan) &&
3343 		    !vmbus_channel_is_revoked(sc->sc_prichan)) {
3344 			DELAY(20);
3345 			s = splnet();
3346 			hvn_nvs_intr1(&sc->sc_rxr[0], sc->sc_tx_process_limit,
3347 			    sc->sc_rx_process_limit);
3348 			splx(s);
3349 		}
3350 		/*
3351 		 * Linger long enough for NVS to disconnect RXBUF.
3352 		 */
3353 		DELAY(200);
3354 	}
3355 
3356 	if (sc->sc_prichan->ch_sc->sc_proto < VMBUS_VERSION_WIN10 &&
3357 	    sc->sc_rx_hndl) {
3358 		/*
3359 		 * Disconnect RXBUF from primary channel.
3360 		 */
3361 		vmbus_handle_free(sc->sc_prichan, sc->sc_rx_hndl);
3362 		sc->sc_rx_hndl = 0;
3363 	}
3364 
3365 	return 0;
3366 }
3367 
3368 static int
hvn_nvs_connect_chim(struct hvn_softc * sc)3369 hvn_nvs_connect_chim(struct hvn_softc *sc)
3370 {
3371 	struct hvn_nvs_chim_conn cmd;
3372 	const struct hvn_nvs_chim_conn_resp *rsp;
3373 	uint64_t tid;
3374 
3375 	mutex_init(&sc->sc_chim_bmap_lock, MUTEX_DEFAULT, IPL_NET);
3376 
3377 	/*
3378 	 * Connect chimney sending buffer GPADL to the primary channel.
3379 	 *
3380 	 * NOTE:
3381 	 * Only primary channel has chimney sending buffer connected to it.
3382 	 * Sub-channels just share this chimney sending buffer.
3383 	 */
3384 	if (vmbus_handle_alloc(sc->sc_prichan, &sc->sc_chim_dma, HVN_CHIM_SIZE,
3385 	    &sc->sc_chim_hndl)) {
3386 		DPRINTF("%s: failed to obtain a PA handle for chimney\n",
3387 		    device_xname(sc->sc_dev));
3388 		return -1;
3389 	}
3390 
3391 	memset(&cmd, 0, sizeof(cmd));
3392 	cmd.nvs_type = HVN_NVS_TYPE_CHIM_CONN;
3393 	cmd.nvs_gpadl = sc->sc_chim_hndl;
3394 	cmd.nvs_sig = HVN_NVS_CHIM_SIG;
3395 
3396 	tid = atomic_inc_uint_nv(&sc->sc_nvstid);
3397 	mutex_enter(&sc->sc_nvsrsp_lock);
3398 	if (hvn_nvs_cmd(sc, &cmd, sizeof(cmd), tid, 0))
3399 		goto errout;
3400 
3401 	rsp = (struct hvn_nvs_chim_conn_resp *)&sc->sc_nvsrsp;
3402 	if (rsp->nvs_status != HVN_NVS_STATUS_OK) {
3403 		DPRINTF("%s: failed to set up chimney sending buffer\n",
3404 		    device_xname(sc->sc_dev));
3405 		goto errout;
3406 	}
3407 
3408 	if (rsp->nvs_sectsz == 0 ||
3409 	    (rsp->nvs_sectsz % sizeof(uint32_t)) != 0) {
3410 		/*
3411 		 * Can't use chimney sending buffer; done!
3412 		 */
3413 		if (rsp->nvs_sectsz == 0) {
3414 			device_printf(sc->sc_dev,
3415 			    "zero chimney sending buffer section size\n");
3416 		} else {
3417 			device_printf(sc->sc_dev,
3418 			    "misaligned chimney sending buffers,"
3419 			    " section size: %d", rsp->nvs_sectsz);
3420 		}
3421 		sc->sc_chim_szmax = 0;
3422 		sc->sc_chim_cnt = 0;
3423 	} else {
3424 		sc->sc_chim_szmax = rsp->nvs_sectsz;
3425 		sc->sc_chim_cnt = HVN_CHIM_SIZE / sc->sc_chim_szmax;
3426 	}
3427 
3428 	if (sc->sc_chim_szmax > 0) {
3429 		if ((HVN_CHIM_SIZE % sc->sc_chim_szmax) != 0) {
3430 			device_printf(sc->sc_dev,
3431 			    "chimney sending sections are not properly "
3432 			    "aligned\n");
3433 		}
3434 		if ((sc->sc_chim_cnt % LONG_BIT) != 0) {
3435 			device_printf(sc->sc_dev,
3436 			    "discard %d chimney sending sections\n",
3437 			    sc->sc_chim_cnt % LONG_BIT);
3438 		}
3439 
3440 		sc->sc_chim_bmap_cnt = sc->sc_chim_cnt / LONG_BIT;
3441 		sc->sc_chim_bmap = kmem_zalloc(sc->sc_chim_bmap_cnt *
3442 		    sizeof(u_long), KM_SLEEP);
3443 	}
3444 
3445 	/* Done! */
3446 	SET(sc->sc_flags, HVN_SCF_CHIM_CONNECTED);
3447 
3448 	aprint_verbose_dev(sc->sc_dev, "chimney sending buffer %d/%d\n",
3449 	    sc->sc_chim_szmax, sc->sc_chim_cnt);
3450 
3451 	mutex_exit(&sc->sc_nvsrsp_lock);
3452 
3453 	return 0;
3454 
3455 errout:
3456 	mutex_exit(&sc->sc_nvsrsp_lock);
3457 	hvn_nvs_disconnect_chim(sc);
3458 	return -1;
3459 }
3460 
3461 static int
hvn_nvs_disconnect_chim(struct hvn_softc * sc)3462 hvn_nvs_disconnect_chim(struct hvn_softc *sc)
3463 {
3464 	struct hvn_nvs_chim_disconn cmd;
3465 	uint64_t tid;
3466 	int s, error;
3467 
3468 	if (ISSET(sc->sc_flags, HVN_SCF_CHIM_CONNECTED)) {
3469 		memset(&cmd, 0, sizeof(cmd));
3470 		cmd.nvs_type = HVN_NVS_TYPE_CHIM_DISCONN;
3471 		cmd.nvs_sig = HVN_NVS_CHIM_SIG;
3472 
3473 		tid = atomic_inc_uint_nv(&sc->sc_nvstid);
3474 		mutex_enter(&sc->sc_nvsrsp_lock);
3475 		error = hvn_nvs_cmd(sc, &cmd, sizeof(cmd), tid,
3476 		    HVN_NVS_CMD_NORESP);
3477 		if (error) {
3478 			device_printf(sc->sc_dev,
3479 			    "failed to send chim disconn: %d", error);
3480 		}
3481 		CLR(sc->sc_flags, HVN_SCF_CHIM_CONNECTED);
3482 		mutex_exit(&sc->sc_nvsrsp_lock);
3483 
3484 		/*
3485 		 * Wait for the hypervisor to receive this NVS request.
3486 		 *
3487 		 * NOTE:
3488 		 * The TX bufring will not be drained by the hypervisor,
3489 		 * if the primary channel is revoked.
3490 		 */
3491 		while (!vmbus_channel_tx_empty(sc->sc_prichan) &&
3492 		    !vmbus_channel_is_revoked(sc->sc_prichan)) {
3493 			DELAY(20);
3494 			s = splnet();
3495 			hvn_nvs_intr1(&sc->sc_rxr[0], sc->sc_tx_process_limit,
3496 			    sc->sc_rx_process_limit);
3497 			splx(s);
3498 		}
3499 		/*
3500 		 * Linger long enough for NVS to disconnect chimney
3501 		 * sending buffer.
3502 		 */
3503 		DELAY(200);
3504 	}
3505 
3506 	if (sc->sc_prichan->ch_sc->sc_proto < VMBUS_VERSION_WIN10 &&
3507 	    sc->sc_chim_hndl) {
3508 		/*
3509 		 * Disconnect chimney sending buffer from primary channel.
3510 		 */
3511 		vmbus_handle_free(sc->sc_prichan, sc->sc_chim_hndl);
3512 		sc->sc_chim_hndl = 0;
3513 	}
3514 
3515 	if (sc->sc_chim_bmap != NULL) {
3516 		kmem_free(sc->sc_chim_bmap, sc->sc_chim_cnt / LONG_BIT);
3517 		sc->sc_chim_bmap = NULL;
3518 		sc->sc_chim_bmap_cnt = 0;
3519 	}
3520 
3521 	mutex_destroy(&sc->sc_chim_bmap_lock);
3522 
3523 	return 0;
3524 }
3525 
3526 #define HVN_HANDLE_RING_DOTX	__BIT(0)
3527 
3528 static int
hvn_handle_ring(struct hvn_rx_ring * rxr,int txlimit,int rxlimit)3529 hvn_handle_ring(struct hvn_rx_ring *rxr, int txlimit, int rxlimit)
3530 {
3531 	struct hvn_softc *sc = rxr->rxr_softc;
3532 	struct vmbus_chanpkt_hdr *cph;
3533 	const struct hvn_nvs_hdr *nvs;
3534 	uint64_t rid;
3535 	uint32_t rlen;
3536 	int n, tx = 0, rx = 0;
3537 	int result = 0;
3538 	int rv;
3539 
3540 	mutex_enter(&rxr->rxr_lock);
3541 	for (;;) {
3542 		rv = vmbus_channel_recv(rxr->rxr_chan, rxr->rxr_nvsbuf,
3543 		    HVN_NVS_BUFSIZE, &rlen, &rid, 1);
3544 		if (rv != 0 || rlen == 0) {
3545 			if (rv != EAGAIN)
3546 				device_printf(sc->sc_dev,
3547 				    "failed to receive an NVSP packet\n");
3548 			break;
3549 		}
3550 		cph = (struct vmbus_chanpkt_hdr *)rxr->rxr_nvsbuf;
3551 		nvs = (const struct hvn_nvs_hdr *)VMBUS_CHANPKT_CONST_DATA(cph);
3552 
3553 		if (cph->cph_type == VMBUS_CHANPKT_TYPE_COMP) {
3554 			switch (nvs->nvs_type) {
3555 			case HVN_NVS_TYPE_INIT_RESP:
3556 			case HVN_NVS_TYPE_RXBUF_CONNRESP:
3557 			case HVN_NVS_TYPE_CHIM_CONNRESP:
3558 			case HVN_NVS_TYPE_SUBCH_RESP:
3559 				mutex_enter(&sc->sc_nvsrsp_lock);
3560 				/* copy the response back */
3561 				memcpy(&sc->sc_nvsrsp, nvs, HVN_NVS_MSGSIZE);
3562 				sc->sc_nvsdone = 1;
3563 				cv_signal(&sc->sc_nvsrsp_cv);
3564 				mutex_exit(&sc->sc_nvsrsp_lock);
3565 				break;
3566 			case HVN_NVS_TYPE_RNDIS_ACK:
3567 				if (rxr->rxr_txr == NULL)
3568 					break;
3569 
3570 				result |= HVN_HANDLE_RING_DOTX;
3571 				mutex_enter(&rxr->rxr_txr->txr_lock);
3572 				hvn_txeof(rxr->rxr_txr, cph->cph_tid);
3573 				mutex_exit(&rxr->rxr_txr->txr_lock);
3574 				if (txlimit > 0 && ++tx >= txlimit)
3575 					goto out;
3576 				break;
3577 			default:
3578 				device_printf(sc->sc_dev,
3579 				    "unhandled NVSP packet type %u "
3580 				    "on completion\n", nvs->nvs_type);
3581 				break;
3582 			}
3583 		} else if (cph->cph_type == VMBUS_CHANPKT_TYPE_RXBUF) {
3584 			switch (nvs->nvs_type) {
3585 			case HVN_NVS_TYPE_RNDIS:
3586 				n = hvn_rndis_input(rxr, cph->cph_tid, cph);
3587 				if (rxlimit > 0) {
3588 					if (n < 0)
3589 						goto out;
3590 					rx += n;
3591 					if (rx >= rxlimit)
3592 						goto out;
3593 				}
3594 				break;
3595 			default:
3596 				device_printf(sc->sc_dev,
3597 				    "unhandled NVSP packet type %u "
3598 				    "on receive\n", nvs->nvs_type);
3599 				break;
3600 			}
3601 		} else if (cph->cph_type == VMBUS_CHANPKT_TYPE_INBAND) {
3602 			switch (nvs->nvs_type) {
3603 			case HVN_NVS_TYPE_TXTBL_NOTE:
3604 				/* Useless; ignore */
3605 				break;
3606 			default:
3607 				device_printf(sc->sc_dev,
3608 				    "got notify, nvs type %u\n", nvs->nvs_type);
3609 				break;
3610 			}
3611 		} else
3612 			device_printf(sc->sc_dev,
3613 			    "unknown NVSP packet type %u\n", cph->cph_type);
3614 	}
3615 out:
3616 	mutex_exit(&rxr->rxr_lock);
3617 
3618 	return result;
3619 }
3620 
3621 static void
hvn_nvs_intr1(struct hvn_rx_ring * rxr,int txlimit,int rxlimit)3622 hvn_nvs_intr1(struct hvn_rx_ring *rxr, int txlimit, int rxlimit)
3623 {
3624 	struct hvn_softc *sc = rxr->rxr_softc;
3625 	struct ifnet *ifp = SC2IFP(sc);
3626 	struct hvn_tx_ring *txr = rxr->rxr_txr;
3627 	int result;
3628 
3629 	rxr->rxr_workqueue = sc->sc_txrx_workqueue;
3630 
3631 	result = hvn_handle_ring(rxr, txlimit, rxlimit);
3632 
3633 	if ((result & HVN_HANDLE_RING_DOTX) && txr != NULL) {
3634 		mutex_enter(&txr->txr_lock);
3635 		/* ALTQ */
3636 		if (txr->txr_id == 0) {
3637 			if_schedule_deferred_start(ifp);
3638 		}
3639 		softint_schedule(txr->txr_si);
3640 		mutex_exit(&txr->txr_lock);
3641 	}
3642 }
3643 
3644 static void
hvn_schedule_handle_ring(struct hvn_softc * sc,struct hvn_rx_ring * rxr,bool intr)3645 hvn_schedule_handle_ring(struct hvn_softc *sc, struct hvn_rx_ring *rxr,
3646     bool intr)
3647 {
3648 
3649 	KASSERT(mutex_owned(&rxr->rxr_onwork_lock));
3650 
3651 	if (rxr->rxr_workqueue) {
3652 		if (!rxr->rxr_onlist) {
3653 			rxr->rxr_onlist = true;
3654 			if (intr)
3655 				rxr->rxr_evdeferreq.ev_count++;
3656 			else
3657 				rxr->rxr_evredeferreq.ev_count++;
3658 			workqueue_enqueue(sc->sc_wq, &rxr->rxr_wk, NULL);
3659 		}
3660 	} else {
3661 		rxr->rxr_onlist = true;
3662 		if (intr)
3663 			rxr->rxr_evdeferreq.ev_count++;
3664 		else
3665 			rxr->rxr_evredeferreq.ev_count++;
3666 		softint_schedule(rxr->rxr_si);
3667 	}
3668 }
3669 
3670 static void
hvn_handle_ring_common(struct hvn_rx_ring * rxr)3671 hvn_handle_ring_common(struct hvn_rx_ring *rxr)
3672 {
3673 	struct hvn_softc *sc = rxr->rxr_softc;
3674 	int txlimit = sc->sc_tx_process_limit;
3675 	int rxlimit = sc->sc_rx_process_limit;
3676 
3677 	rxr->rxr_evdefer.ev_count++;
3678 
3679 	mutex_enter(&rxr->rxr_onwork_lock);
3680 	rxr->rxr_onproc = true;
3681 	rxr->rxr_onlist = false;
3682 	mutex_exit(&rxr->rxr_onwork_lock);
3683 
3684 	hvn_nvs_intr1(rxr, txlimit, rxlimit);
3685 
3686 	mutex_enter(&rxr->rxr_onwork_lock);
3687 	if (vmbus_channel_unpause(rxr->rxr_chan)) {
3688 		vmbus_channel_pause(rxr->rxr_chan);
3689 		hvn_schedule_handle_ring(sc, rxr, false);
3690 	}
3691 	rxr->rxr_onproc = false;
3692 	cv_broadcast(&rxr->rxr_onwork_cv);
3693 	mutex_exit(&rxr->rxr_onwork_lock);
3694 }
3695 
3696 static void
hvn_handle_ring_work(struct work * wk,void * arg)3697 hvn_handle_ring_work(struct work *wk, void *arg)
3698 {
3699 	struct hvn_rx_ring *rxr = container_of(wk, struct hvn_rx_ring, rxr_wk);
3700 
3701 	hvn_handle_ring_common(rxr);
3702 }
3703 
3704 static void
hvn_nvs_softintr(void * arg)3705 hvn_nvs_softintr(void *arg)
3706 {
3707 	struct hvn_rx_ring *rxr = arg;
3708 
3709 	hvn_handle_ring_common(rxr);
3710 }
3711 
3712 static void
hvn_nvs_intr(void * arg)3713 hvn_nvs_intr(void *arg)
3714 {
3715 	struct hvn_rx_ring *rxr = arg;
3716 	struct hvn_softc *sc = rxr->rxr_softc;
3717 	int txlimit = cold ? 0 : sc->sc_tx_intr_process_limit;
3718 	int rxlimit = cold ? 0 : sc->sc_rx_intr_process_limit;
3719 
3720 	rxr->rxr_evintr.ev_count++;
3721 
3722 	KASSERT(!rxr->rxr_onproc);
3723 	KASSERT(!rxr->rxr_onlist);
3724 
3725 	vmbus_channel_pause(rxr->rxr_chan);
3726 
3727 	hvn_nvs_intr1(rxr, txlimit, rxlimit);
3728 
3729 	if (vmbus_channel_unpause(rxr->rxr_chan) && !cold) {
3730 		vmbus_channel_pause(rxr->rxr_chan);
3731 		mutex_enter(&rxr->rxr_onwork_lock);
3732 		hvn_schedule_handle_ring(sc, rxr, true);
3733 		mutex_exit(&rxr->rxr_onwork_lock);
3734 	}
3735 }
3736 
3737 static int
hvn_nvs_cmd(struct hvn_softc * sc,void * cmd,size_t cmdsize,uint64_t tid,u_int flags)3738 hvn_nvs_cmd(struct hvn_softc *sc, void *cmd, size_t cmdsize, uint64_t tid,
3739     u_int flags)
3740 {
3741 	struct hvn_rx_ring *rxr = &sc->sc_rxr[0];	/* primary channel */
3742 	struct hvn_nvs_hdr *hdr = cmd;
3743 	int tries = 10;
3744 	int rv, s;
3745 
3746 	KASSERT(mutex_owned(&sc->sc_nvsrsp_lock));
3747 
3748 	sc->sc_nvsdone = 0;
3749 
3750 	do {
3751 		rv = vmbus_channel_send(rxr->rxr_chan, cmd, cmdsize,
3752 		    tid, VMBUS_CHANPKT_TYPE_INBAND,
3753 		    ISSET(flags, HVN_NVS_CMD_NORESP) ? 0 :
3754 		      VMBUS_CHANPKT_FLAG_RC);
3755 		if (rv == EAGAIN) {
3756 			DELAY(1000);
3757 		} else if (rv) {
3758 			DPRINTF("%s: NVSP operation %u send error %d\n",
3759 			    device_xname(sc->sc_dev), hdr->nvs_type, rv);
3760 			return rv;
3761 		}
3762 	} while (rv != 0 && --tries > 0);
3763 
3764 	if (tries == 0 && rv != 0) {
3765 		device_printf(sc->sc_dev,
3766 		    "NVSP operation %u send error %d\n", hdr->nvs_type, rv);
3767 		return rv;
3768 	}
3769 
3770 	if (ISSET(flags, HVN_NVS_CMD_NORESP))
3771 		return 0;
3772 
3773 	while (!sc->sc_nvsdone && !ISSET(sc->sc_flags, HVN_SCF_REVOKED)) {
3774 		mutex_exit(&sc->sc_nvsrsp_lock);
3775 		DELAY(1000);
3776 		s = splnet();
3777 		hvn_nvs_intr1(rxr, 0, 0);
3778 		splx(s);
3779 		mutex_enter(&sc->sc_nvsrsp_lock);
3780 	}
3781 
3782 	return 0;
3783 }
3784 
3785 static int
hvn_nvs_ack(struct hvn_rx_ring * rxr,uint64_t tid)3786 hvn_nvs_ack(struct hvn_rx_ring *rxr, uint64_t tid)
3787 {
3788 	struct hvn_softc *sc __unused = rxr->rxr_softc;
3789 	struct hvn_nvs_rndis_ack cmd;
3790 	int tries = 5;
3791 	int rv;
3792 
3793 	cmd.nvs_type = HVN_NVS_TYPE_RNDIS_ACK;
3794 	cmd.nvs_status = HVN_NVS_STATUS_OK;
3795 	do {
3796 		rv = vmbus_channel_send(rxr->rxr_chan, &cmd, sizeof(cmd),
3797 		    tid, VMBUS_CHANPKT_TYPE_COMP, 0);
3798 		if (rv == EAGAIN)
3799 			DELAY(10);
3800 		else if (rv) {
3801 			DPRINTF("%s: NVSP acknowledgement error %d\n",
3802 			    device_xname(sc->sc_dev), rv);
3803 			return rv;
3804 		}
3805 	} while (rv != 0 && --tries > 0);
3806 	return rv;
3807 }
3808 
3809 static void
hvn_nvs_detach(struct hvn_softc * sc)3810 hvn_nvs_detach(struct hvn_softc *sc)
3811 {
3812 
3813 	hvn_nvs_disconnect_rxbuf(sc);
3814 	hvn_nvs_disconnect_chim(sc);
3815 }
3816 
3817 static int
hvn_nvs_alloc_subchannels(struct hvn_softc * sc,int * nsubchp)3818 hvn_nvs_alloc_subchannels(struct hvn_softc *sc, int *nsubchp)
3819 {
3820 	struct hvn_nvs_subch_req cmd;
3821 	struct hvn_nvs_subch_resp *rsp;
3822 	uint64_t tid;
3823 	int nsubch, nsubch_req;
3824 
3825 	nsubch_req = *nsubchp;
3826 	KASSERTMSG(nsubch_req > 0, "invalid # of sub-channels %d", nsubch_req);
3827 
3828 	memset(&cmd, 0, sizeof(cmd));
3829 	cmd.nvs_type = HVN_NVS_TYPE_SUBCH_REQ;
3830 	cmd.nvs_op = HVN_NVS_SUBCH_OP_ALLOC;
3831 	cmd.nvs_nsubch = nsubch_req;
3832 
3833 	tid = atomic_inc_uint_nv(&sc->sc_nvstid);
3834 	mutex_enter(&sc->sc_nvsrsp_lock);
3835 	if (hvn_nvs_cmd(sc, &cmd, sizeof(cmd), tid, 0)) {
3836 		mutex_exit(&sc->sc_nvsrsp_lock);
3837 		return EIO;
3838 	}
3839 
3840 	rsp = (struct hvn_nvs_subch_resp *)&sc->sc_nvsrsp;
3841 	if (rsp->nvs_status != HVN_NVS_STATUS_OK) {
3842 		mutex_exit(&sc->sc_nvsrsp_lock);
3843 		DPRINTF("%s: failed to alloc sub-channels\n",
3844 		    device_xname(sc->sc_dev));
3845 		return EIO;
3846 	}
3847 
3848 	nsubch = rsp->nvs_nsubch;
3849 	if (nsubch > nsubch_req) {
3850 		aprint_debug_dev(sc->sc_dev,
3851 		    "%u subchans are allocated, requested %d\n",
3852 		    nsubch, nsubch_req);
3853 		nsubch = nsubch_req;
3854 	}
3855 	mutex_exit(&sc->sc_nvsrsp_lock);
3856 
3857 	*nsubchp = nsubch;
3858 
3859 	return 0;
3860 }
3861 
3862 static inline struct rndis_cmd *
hvn_alloc_cmd(struct hvn_softc * sc)3863 hvn_alloc_cmd(struct hvn_softc *sc)
3864 {
3865 	struct rndis_cmd *rc;
3866 
3867 	mutex_enter(&sc->sc_cntl_fqlck);
3868 	while ((rc = TAILQ_FIRST(&sc->sc_cntl_fq)) == NULL)
3869 		cv_wait(&sc->sc_cntl_fqcv, &sc->sc_cntl_fqlck);
3870 	TAILQ_REMOVE(&sc->sc_cntl_fq, rc, rc_entry);
3871 	mutex_exit(&sc->sc_cntl_fqlck);
3872 	return rc;
3873 }
3874 
3875 static inline void
hvn_submit_cmd(struct hvn_softc * sc,struct rndis_cmd * rc)3876 hvn_submit_cmd(struct hvn_softc *sc, struct rndis_cmd *rc)
3877 {
3878 
3879 	mutex_enter(&sc->sc_cntl_sqlck);
3880 	TAILQ_INSERT_TAIL(&sc->sc_cntl_sq, rc, rc_entry);
3881 	mutex_exit(&sc->sc_cntl_sqlck);
3882 }
3883 
3884 static inline struct rndis_cmd *
hvn_complete_cmd(struct hvn_softc * sc,uint32_t id)3885 hvn_complete_cmd(struct hvn_softc *sc, uint32_t id)
3886 {
3887 	struct rndis_cmd *rc;
3888 
3889 	mutex_enter(&sc->sc_cntl_sqlck);
3890 	TAILQ_FOREACH(rc, &sc->sc_cntl_sq, rc_entry) {
3891 		if (rc->rc_id == id) {
3892 			TAILQ_REMOVE(&sc->sc_cntl_sq, rc, rc_entry);
3893 			break;
3894 		}
3895 	}
3896 	mutex_exit(&sc->sc_cntl_sqlck);
3897 	if (rc != NULL) {
3898 		mutex_enter(&sc->sc_cntl_cqlck);
3899 		TAILQ_INSERT_TAIL(&sc->sc_cntl_cq, rc, rc_entry);
3900 		mutex_exit(&sc->sc_cntl_cqlck);
3901 	}
3902 	return rc;
3903 }
3904 
3905 static inline void
hvn_release_cmd(struct hvn_softc * sc,struct rndis_cmd * rc)3906 hvn_release_cmd(struct hvn_softc *sc, struct rndis_cmd *rc)
3907 {
3908 
3909 	mutex_enter(&sc->sc_cntl_cqlck);
3910 	TAILQ_REMOVE(&sc->sc_cntl_cq, rc, rc_entry);
3911 	mutex_exit(&sc->sc_cntl_cqlck);
3912 }
3913 
3914 static inline int
hvn_rollback_cmd(struct hvn_softc * sc,struct rndis_cmd * rc)3915 hvn_rollback_cmd(struct hvn_softc *sc, struct rndis_cmd *rc)
3916 {
3917 	struct rndis_cmd *rn;
3918 
3919 	mutex_enter(&sc->sc_cntl_sqlck);
3920 	TAILQ_FOREACH(rn, &sc->sc_cntl_sq, rc_entry) {
3921 		if (rn == rc) {
3922 			TAILQ_REMOVE(&sc->sc_cntl_sq, rc, rc_entry);
3923 			mutex_exit(&sc->sc_cntl_sqlck);
3924 			return 0;
3925 		}
3926 	}
3927 	mutex_exit(&sc->sc_cntl_sqlck);
3928 	return -1;
3929 }
3930 
3931 static inline void
hvn_free_cmd(struct hvn_softc * sc,struct rndis_cmd * rc)3932 hvn_free_cmd(struct hvn_softc *sc, struct rndis_cmd *rc)
3933 {
3934 
3935 	memset(rc->rc_req, 0, sizeof(struct rndis_packet_msg));
3936 	memset(&rc->rc_cmp, 0, sizeof(rc->rc_cmp));
3937 	memset(&rc->rc_msg, 0, sizeof(rc->rc_msg));
3938 	mutex_enter(&sc->sc_cntl_fqlck);
3939 	TAILQ_INSERT_TAIL(&sc->sc_cntl_fq, rc, rc_entry);
3940 	cv_signal(&sc->sc_cntl_fqcv);
3941 	mutex_exit(&sc->sc_cntl_fqlck);
3942 }
3943 
3944 static int
hvn_rndis_init(struct hvn_softc * sc)3945 hvn_rndis_init(struct hvn_softc *sc)
3946 {
3947 	struct rndis_cmd *rc;
3948 	int i;
3949 
3950 	/* RNDIS control message queues */
3951 	TAILQ_INIT(&sc->sc_cntl_sq);
3952 	TAILQ_INIT(&sc->sc_cntl_cq);
3953 	TAILQ_INIT(&sc->sc_cntl_fq);
3954 	mutex_init(&sc->sc_cntl_sqlck, MUTEX_DEFAULT, IPL_NET);
3955 	mutex_init(&sc->sc_cntl_cqlck, MUTEX_DEFAULT, IPL_NET);
3956 	mutex_init(&sc->sc_cntl_fqlck, MUTEX_DEFAULT, IPL_NET);
3957 	cv_init(&sc->sc_cntl_fqcv, "nvsalloc");
3958 
3959 	for (i = 0; i < HVN_RNDIS_CTLREQS; i++) {
3960 		rc = &sc->sc_cntl_msgs[i];
3961 		if (bus_dmamap_create(sc->sc_dmat, PAGE_SIZE, 1, PAGE_SIZE, 0,
3962 		    BUS_DMA_WAITOK, &rc->rc_dmap)) {
3963 			DPRINTF("%s: failed to create RNDIS command map\n",
3964 			    device_xname(sc->sc_dev));
3965 			goto errout;
3966 		}
3967 		if (bus_dmamem_alloc(sc->sc_dmat, PAGE_SIZE, PAGE_SIZE,
3968 		    0, &rc->rc_segs, 1, &rc->rc_nsegs, BUS_DMA_WAITOK)) {
3969 			DPRINTF("%s: failed to allocate RNDIS command\n",
3970 			    device_xname(sc->sc_dev));
3971 			bus_dmamap_destroy(sc->sc_dmat, rc->rc_dmap);
3972 			goto errout;
3973 		}
3974 		if (bus_dmamem_map(sc->sc_dmat, &rc->rc_segs, rc->rc_nsegs,
3975 		    PAGE_SIZE, (void **)&rc->rc_req, BUS_DMA_WAITOK)) {
3976 			DPRINTF("%s: failed to allocate RNDIS command\n",
3977 			    device_xname(sc->sc_dev));
3978 			bus_dmamem_free(sc->sc_dmat, &rc->rc_segs,
3979 			    rc->rc_nsegs);
3980 			bus_dmamap_destroy(sc->sc_dmat, rc->rc_dmap);
3981 			goto errout;
3982 		}
3983 		memset(rc->rc_req, 0, PAGE_SIZE);
3984 		if (bus_dmamap_load(sc->sc_dmat, rc->rc_dmap, rc->rc_req,
3985 		    PAGE_SIZE, NULL, BUS_DMA_WAITOK)) {
3986 			DPRINTF("%s: failed to load RNDIS command map\n",
3987 			    device_xname(sc->sc_dev));
3988 			bus_dmamem_unmap(sc->sc_dmat, rc->rc_req, PAGE_SIZE);
3989 			rc->rc_req = NULL;
3990 			bus_dmamem_free(sc->sc_dmat, &rc->rc_segs,
3991 			    rc->rc_nsegs);
3992 			bus_dmamap_destroy(sc->sc_dmat, rc->rc_dmap);
3993 			goto errout;
3994 		}
3995 		rc->rc_gpa = atop(rc->rc_dmap->dm_segs[0].ds_addr);
3996 		mutex_init(&rc->rc_lock, MUTEX_DEFAULT, IPL_NET);
3997 		cv_init(&rc->rc_cv, "rndiscmd");
3998 		TAILQ_INSERT_TAIL(&sc->sc_cntl_fq, rc, rc_entry);
3999 	}
4000 
4001 	/* Initialize RNDIS Data command */
4002 	memset(&sc->sc_data_msg, 0, sizeof(sc->sc_data_msg));
4003 	sc->sc_data_msg.nvs_type = HVN_NVS_TYPE_RNDIS;
4004 	sc->sc_data_msg.nvs_rndis_mtype = HVN_NVS_RNDIS_MTYPE_DATA;
4005 	sc->sc_data_msg.nvs_chim_idx = HVN_NVS_CHIM_IDX_INVALID;
4006 
4007 	return 0;
4008 
4009 errout:
4010 	hvn_rndis_destroy(sc);
4011 	return -1;
4012 }
4013 
4014 static void
hvn_rndis_destroy(struct hvn_softc * sc)4015 hvn_rndis_destroy(struct hvn_softc *sc)
4016 {
4017 	struct rndis_cmd *rc;
4018 	int i;
4019 
4020 	for (i = 0; i < HVN_RNDIS_CTLREQS; i++) {
4021 		rc = &sc->sc_cntl_msgs[i];
4022 		if (rc->rc_req == NULL)
4023 			continue;
4024 
4025 		TAILQ_REMOVE(&sc->sc_cntl_fq, rc, rc_entry);
4026 		bus_dmamap_unload(sc->sc_dmat, rc->rc_dmap);
4027 		bus_dmamem_unmap(sc->sc_dmat, rc->rc_req, PAGE_SIZE);
4028 		rc->rc_req = NULL;
4029 		bus_dmamem_free(sc->sc_dmat, &rc->rc_segs, rc->rc_nsegs);
4030 		bus_dmamap_destroy(sc->sc_dmat, rc->rc_dmap);
4031 		mutex_destroy(&rc->rc_lock);
4032 		cv_destroy(&rc->rc_cv);
4033 	}
4034 
4035 	mutex_destroy(&sc->sc_cntl_sqlck);
4036 	mutex_destroy(&sc->sc_cntl_cqlck);
4037 	mutex_destroy(&sc->sc_cntl_fqlck);
4038 	cv_destroy(&sc->sc_cntl_fqcv);
4039 }
4040 
4041 static int
hvn_rndis_attach(struct hvn_softc * sc,int mtu)4042 hvn_rndis_attach(struct hvn_softc *sc, int mtu)
4043 {
4044 	struct rndis_init_req *req;
4045 	struct rndis_init_comp *cmp;
4046 	struct rndis_cmd *rc;
4047 	int rv;
4048 
4049 	rc = hvn_alloc_cmd(sc);
4050 
4051 	bus_dmamap_sync(sc->sc_dmat, rc->rc_dmap, 0, PAGE_SIZE,
4052 	    BUS_DMASYNC_PREREAD);
4053 
4054 	rc->rc_id = atomic_inc_uint_nv(&sc->sc_rndisrid);
4055 
4056 	req = rc->rc_req;
4057 	req->rm_type = REMOTE_NDIS_INITIALIZE_MSG;
4058 	req->rm_len = sizeof(*req);
4059 	req->rm_rid = rc->rc_id;
4060 	req->rm_ver_major = RNDIS_VERSION_MAJOR;
4061 	req->rm_ver_minor = RNDIS_VERSION_MINOR;
4062 	req->rm_max_xfersz = HVN_RNDIS_XFER_SIZE;
4063 
4064 	rc->rc_cmplen = sizeof(*cmp);
4065 
4066 	bus_dmamap_sync(sc->sc_dmat, rc->rc_dmap, 0, PAGE_SIZE,
4067 	    BUS_DMASYNC_PREWRITE);
4068 
4069 	if ((rv = hvn_rndis_cmd(sc, rc, 0)) != 0) {
4070 		DPRINTF("%s: INITIALIZE_MSG failed, error %d\n",
4071 		    device_xname(sc->sc_dev), rv);
4072 		hvn_free_cmd(sc, rc);
4073 		return -1;
4074 	}
4075 	cmp = (struct rndis_init_comp *)&rc->rc_cmp;
4076 	if (cmp->rm_status != RNDIS_STATUS_SUCCESS) {
4077 		DPRINTF("%s: failed to init RNDIS, error %#x\n",
4078 		    device_xname(sc->sc_dev), cmp->rm_status);
4079 		hvn_free_cmd(sc, rc);
4080 		return -1;
4081 	}
4082 
4083 	sc->sc_rndis_agg_size = cmp->rm_pktmaxsz;
4084 	sc->sc_rndis_agg_pkts = cmp->rm_pktmaxcnt;
4085 	sc->sc_rndis_agg_align = __BIT(cmp->rm_align);
4086 
4087 	if (sc->sc_rndis_agg_align < sizeof(uint32_t)) {
4088 		/*
4089 		 * The RNDIS packet message encap assumes that the RNDIS
4090 		 * packet message is at least 4 bytes aligned.  Fix up the
4091 		 * alignment here, if the remote side sets the alignment
4092 		 * too low.
4093 		 */
4094 		aprint_verbose_dev(sc->sc_dev,
4095 		    "fixup RNDIS aggpkt align: %u -> %zu\n",
4096 		    sc->sc_rndis_agg_align, sizeof(uint32_t));
4097 		sc->sc_rndis_agg_align = sizeof(uint32_t);
4098 	}
4099 
4100 	aprint_verbose_dev(sc->sc_dev,
4101 	    "RNDIS ver %u.%u, aggpkt size %u, aggpkt cnt %u, aggpkt align %u\n",
4102 	    cmp->rm_ver_major, cmp->rm_ver_minor, sc->sc_rndis_agg_size,
4103 	    sc->sc_rndis_agg_pkts, sc->sc_rndis_agg_align);
4104 
4105 	hvn_free_cmd(sc, rc);
4106 
4107 	return 0;
4108 }
4109 
4110 static int
hvn_get_rsscaps(struct hvn_softc * sc,int * nrxr)4111 hvn_get_rsscaps(struct hvn_softc *sc, int *nrxr)
4112 {
4113 	struct ndis_rss_caps in, caps;
4114 	size_t caps_len;
4115 	int error, rxr_cnt, indsz, hash_fnidx;
4116 	uint32_t hash_func = 0, hash_types = 0;
4117 
4118 	*nrxr = 0;
4119 
4120 	if (sc->sc_ndisver < NDIS_VERSION_6_20)
4121 		return EOPNOTSUPP;
4122 
4123 	memset(&in, 0, sizeof(in));
4124 	in.ndis_hdr.ndis_type = NDIS_OBJTYPE_RSS_CAPS;
4125 	in.ndis_hdr.ndis_rev = NDIS_RSS_CAPS_REV_2;
4126 	in.ndis_hdr.ndis_size = NDIS_RSS_CAPS_SIZE;
4127 
4128 	caps_len = NDIS_RSS_CAPS_SIZE;
4129 	error = hvn_rndis_query2(sc, OID_GEN_RECEIVE_SCALE_CAPABILITIES,
4130 	    &in, NDIS_RSS_CAPS_SIZE, &caps, &caps_len, NDIS_RSS_CAPS_SIZE_6_0);
4131 	if (error)
4132 		return error;
4133 
4134 	/*
4135 	 * Preliminary verification.
4136 	 */
4137 	if (caps.ndis_hdr.ndis_type != NDIS_OBJTYPE_RSS_CAPS) {
4138 		DPRINTF("%s: invalid NDIS objtype 0x%02x\n",
4139 		    device_xname(sc->sc_dev), caps.ndis_hdr.ndis_type);
4140 		return EINVAL;
4141 	}
4142 	if (caps.ndis_hdr.ndis_rev < NDIS_RSS_CAPS_REV_1) {
4143 		DPRINTF("%s: invalid NDIS objrev 0x%02x\n",
4144 		    device_xname(sc->sc_dev), caps.ndis_hdr.ndis_rev);
4145 		return EINVAL;
4146 	}
4147 	if (caps.ndis_hdr.ndis_size > caps_len) {
4148 		DPRINTF("%s: invalid NDIS objsize %u, data size %zu\n",
4149 		    device_xname(sc->sc_dev), caps.ndis_hdr.ndis_size,
4150 		    caps_len);
4151 		return EINVAL;
4152 	} else if (caps.ndis_hdr.ndis_size < NDIS_RSS_CAPS_SIZE_6_0) {
4153 		DPRINTF("%s: invalid NDIS objsize %u\n",
4154 		    device_xname(sc->sc_dev), caps.ndis_hdr.ndis_size);
4155 		return EINVAL;
4156 	}
4157 
4158 	/*
4159 	 * Save information for later RSS configuration.
4160 	 */
4161 	if (caps.ndis_nrxr == 0) {
4162 		DPRINTF("%s: 0 RX rings!?\n", device_xname(sc->sc_dev));
4163 		return EINVAL;
4164 	}
4165 	rxr_cnt = caps.ndis_nrxr;
4166 	aprint_debug_dev(sc->sc_dev, "%u Rx rings\n", rxr_cnt);
4167 
4168 	if (caps.ndis_hdr.ndis_size == NDIS_RSS_CAPS_SIZE &&
4169 	    caps.ndis_hdr.ndis_rev >= NDIS_RSS_CAPS_REV_2) {
4170 		if (caps.ndis_nind > NDIS_HASH_INDCNT) {
4171 			DPRINTF("%s: too many RSS indirect table entries %u\n",
4172 			    device_xname(sc->sc_dev), caps.ndis_nind);
4173 			return EOPNOTSUPP;
4174 		}
4175 		if (!powerof2(caps.ndis_nind)) {
4176 			DPRINTF("%s: RSS indirect table size is not power-of-2:"
4177 			    " %u\n", device_xname(sc->sc_dev), caps.ndis_nind);
4178 			return EOPNOTSUPP;
4179 		}
4180 
4181 		indsz = caps.ndis_nind;
4182 	} else {
4183 		indsz = NDIS_HASH_INDCNT;
4184 	}
4185 	if (rxr_cnt > indsz) {
4186 		aprint_debug_dev(sc->sc_dev,
4187 		    "# of RX rings (%u) > RSS indirect table size %u\n",
4188 		    rxr_cnt, indsz);
4189 		rxr_cnt = indsz;
4190 	}
4191 
4192 	/*
4193 	 * NOTE:
4194 	 * Toeplitz is at the lowest bit, and it is prefered; so ffs(),
4195 	 * instead of fls(), is used here.
4196 	 */
4197 	hash_fnidx = ffs(caps.ndis_caps & NDIS_RSS_CAP_HASHFUNC_MASK);
4198 	if (hash_fnidx == 0) {
4199 		DPRINTF("%s: no hash functions, caps 0x%08x\n",
4200 		    device_xname(sc->sc_dev), caps.ndis_caps);
4201 		return EOPNOTSUPP;
4202 	}
4203 	hash_func = 1 << (hash_fnidx - 1);	/* ffs is 1-based */
4204 
4205 	if (caps.ndis_caps & NDIS_RSS_CAP_IPV4)
4206 		hash_types |= NDIS_HASH_IPV4 | NDIS_HASH_TCP_IPV4;
4207 	if (caps.ndis_caps & NDIS_RSS_CAP_IPV6)
4208 		hash_types |= NDIS_HASH_IPV6 | NDIS_HASH_TCP_IPV6;
4209 	if (caps.ndis_caps & NDIS_RSS_CAP_IPV6_EX)
4210 		hash_types |= NDIS_HASH_IPV6_EX | NDIS_HASH_TCP_IPV6_EX;
4211 	if (hash_types == 0) {
4212 		DPRINTF("%s: no hash types, caps 0x%08x\n",
4213 		    device_xname(sc->sc_dev), caps.ndis_caps);
4214 		return EOPNOTSUPP;
4215 	}
4216 	aprint_debug_dev(sc->sc_dev, "RSS caps %#x\n", caps.ndis_caps);
4217 
4218 	sc->sc_rss_ind_size = indsz;
4219 	sc->sc_rss_hcap = hash_func | hash_types;
4220 	if (sc->sc_caps & HVN_CAPS_UDPHASH) {
4221 		/* UDP 4-tuple hash is unconditionally enabled. */
4222 		sc->sc_rss_hcap |= NDIS_HASH_UDP_IPV4_X;
4223 	}
4224 	*nrxr = rxr_cnt;
4225 
4226 	return 0;
4227 }
4228 
4229 static int
hvn_set_rss(struct hvn_softc * sc,uint16_t flags)4230 hvn_set_rss(struct hvn_softc *sc, uint16_t flags)
4231 {
4232 	struct ndis_rssprm_toeplitz *rss = &sc->sc_rss;
4233 	struct ndis_rss_params *params = &rss->rss_params;
4234 	int len;
4235 
4236 	/*
4237 	 * Only NDIS 6.20+ is supported:
4238 	 * We only support 4bytes element in indirect table, which has been
4239 	 * adopted since NDIS 6.20.
4240 	 */
4241 	if (sc->sc_ndisver < NDIS_VERSION_6_20)
4242 		return 0;
4243 
4244 	/* XXX only one can be specified through, popcnt? */
4245 	KASSERTMSG((sc->sc_rss_hash & NDIS_HASH_FUNCTION_MASK),
4246 	    "no hash func %08x", sc->sc_rss_hash);
4247 	KASSERTMSG((sc->sc_rss_hash & NDIS_HASH_STD),
4248 	    "no standard hash types %08x", sc->sc_rss_hash);
4249 	KASSERTMSG(sc->sc_rss_ind_size > 0, "no indirect table size");
4250 
4251 	aprint_debug_dev(sc->sc_dev, "RSS indirect table size %d, hash %#x\n",
4252 	    sc->sc_rss_ind_size, sc->sc_rss_hash);
4253 
4254 	len = NDIS_RSSPRM_TOEPLITZ_SIZE(sc->sc_rss_ind_size);
4255 
4256 	memset(params, 0, sizeof(*params));
4257 	params->ndis_hdr.ndis_type = NDIS_OBJTYPE_RSS_PARAMS;
4258 	params->ndis_hdr.ndis_rev = NDIS_RSS_PARAMS_REV_2;
4259 	params->ndis_hdr.ndis_size = len;
4260 	params->ndis_flags = flags;
4261 	params->ndis_hash =
4262 	    sc->sc_rss_hash & (NDIS_HASH_FUNCTION_MASK | NDIS_HASH_STD);
4263 	params->ndis_indsize = sizeof(rss->rss_ind[0]) * sc->sc_rss_ind_size;
4264 	params->ndis_indoffset =
4265 	    offsetof(struct ndis_rssprm_toeplitz, rss_ind[0]);
4266 	params->ndis_keysize = sizeof(rss->rss_key);
4267 	params->ndis_keyoffset =
4268 	    offsetof(struct ndis_rssprm_toeplitz, rss_key[0]);
4269 
4270 	return hvn_rndis_set(sc, OID_GEN_RECEIVE_SCALE_PARAMETERS, rss, len);
4271 }
4272 
4273 static void
hvn_fixup_rss_ind(struct hvn_softc * sc)4274 hvn_fixup_rss_ind(struct hvn_softc *sc)
4275 {
4276 	struct ndis_rssprm_toeplitz *rss = &sc->sc_rss;
4277 	int i, nchan;
4278 
4279 	nchan = sc->sc_nrxr_inuse;
4280 	KASSERTMSG(nchan > 1, "invalid # of channels %d", nchan);
4281 
4282 	/*
4283 	 * Check indirect table to make sure that all channels in it
4284 	 * can be used.
4285 	 */
4286 	for (i = 0; i < NDIS_HASH_INDCNT; i++) {
4287 		if (rss->rss_ind[i] >= nchan) {
4288 			DPRINTF("%s: RSS indirect table %d fixup: %u -> %d\n",
4289 			    device_xname(sc->sc_dev), i, rss->rss_ind[i],
4290 			    nchan - 1);
4291 			rss->rss_ind[i] = nchan - 1;
4292 		}
4293 	}
4294 }
4295 
4296 static int
hvn_get_hwcaps(struct hvn_softc * sc,struct ndis_offload * caps)4297 hvn_get_hwcaps(struct hvn_softc *sc, struct ndis_offload *caps)
4298 {
4299 	struct ndis_offload in;
4300 	size_t caps_len, len;
4301 	int error;
4302 
4303 	memset(&in, 0, sizeof(in));
4304 	in.ndis_hdr.ndis_type = NDIS_OBJTYPE_OFFLOAD;
4305 	if (sc->sc_ndisver >= NDIS_VERSION_6_30) {
4306 		in.ndis_hdr.ndis_rev = NDIS_OFFLOAD_REV_3;
4307 		len = in.ndis_hdr.ndis_size = NDIS_OFFLOAD_SIZE;
4308 	} else if (sc->sc_ndisver >= NDIS_VERSION_6_1) {
4309 		in.ndis_hdr.ndis_rev = NDIS_OFFLOAD_REV_2;
4310 		len = in.ndis_hdr.ndis_size = NDIS_OFFLOAD_SIZE_6_1;
4311 	} else {
4312 		in.ndis_hdr.ndis_rev = NDIS_OFFLOAD_REV_1;
4313 		len = in.ndis_hdr.ndis_size = NDIS_OFFLOAD_SIZE_6_0;
4314 	}
4315 
4316 	caps_len = NDIS_OFFLOAD_SIZE;
4317 	error = hvn_rndis_query2(sc, OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES,
4318 	    &in, len, caps, &caps_len, NDIS_OFFLOAD_SIZE_6_0);
4319 	if (error)
4320 		return error;
4321 
4322 	/*
4323 	 * Preliminary verification.
4324 	 */
4325 	if (caps->ndis_hdr.ndis_type != NDIS_OBJTYPE_OFFLOAD) {
4326 		DPRINTF("%s: invalid NDIS objtype 0x%02x\n",
4327 		    device_xname(sc->sc_dev), caps->ndis_hdr.ndis_type);
4328 		return EINVAL;
4329 	}
4330 	if (caps->ndis_hdr.ndis_rev < NDIS_OFFLOAD_REV_1) {
4331 		DPRINTF("%s: invalid NDIS objrev 0x%02x\n",
4332 		    device_xname(sc->sc_dev), caps->ndis_hdr.ndis_rev);
4333 		return EINVAL;
4334 	}
4335 	if (caps->ndis_hdr.ndis_size > caps_len) {
4336 		DPRINTF("%s: invalid NDIS objsize %u, data size %zu\n",
4337 		    device_xname(sc->sc_dev), caps->ndis_hdr.ndis_size,
4338 		    caps_len);
4339 		return EINVAL;
4340 	} else if (caps->ndis_hdr.ndis_size < NDIS_OFFLOAD_SIZE_6_0) {
4341 		DPRINTF("%s: invalid NDIS objsize %u\n",
4342 		    device_xname(sc->sc_dev), caps->ndis_hdr.ndis_size);
4343 		return EINVAL;
4344 	}
4345 
4346 	/*
4347 	 * NOTE:
4348 	 * caps->ndis_hdr.ndis_size MUST be checked before accessing
4349 	 * NDIS 6.1+ specific fields.
4350 	 */
4351 	aprint_debug_dev(sc->sc_dev, "hwcaps rev %u\n",
4352 	    caps->ndis_hdr.ndis_rev);
4353 
4354 	aprint_debug_dev(sc->sc_dev, "hwcaps csum: "
4355 	    "ip4 tx 0x%x/0x%x rx 0x%x/0x%x, "
4356 	    "ip6 tx 0x%x/0x%x rx 0x%x/0x%x\n",
4357 	    caps->ndis_csum.ndis_ip4_txcsum, caps->ndis_csum.ndis_ip4_txenc,
4358 	    caps->ndis_csum.ndis_ip4_rxcsum, caps->ndis_csum.ndis_ip4_rxenc,
4359 	    caps->ndis_csum.ndis_ip6_txcsum, caps->ndis_csum.ndis_ip6_txenc,
4360 	    caps->ndis_csum.ndis_ip6_rxcsum, caps->ndis_csum.ndis_ip6_rxenc);
4361 	aprint_debug_dev(sc->sc_dev, "hwcaps lsov2: "
4362 	    "ip4 maxsz %u minsg %u encap 0x%x, "
4363 	    "ip6 maxsz %u minsg %u encap 0x%x opts 0x%x\n",
4364 	    caps->ndis_lsov2.ndis_ip4_maxsz, caps->ndis_lsov2.ndis_ip4_minsg,
4365 	    caps->ndis_lsov2.ndis_ip4_encap, caps->ndis_lsov2.ndis_ip6_maxsz,
4366 	    caps->ndis_lsov2.ndis_ip6_minsg, caps->ndis_lsov2.ndis_ip6_encap,
4367 	    caps->ndis_lsov2.ndis_ip6_opts);
4368 
4369 	return 0;
4370 }
4371 
4372 static int
hvn_set_capabilities(struct hvn_softc * sc,int mtu)4373 hvn_set_capabilities(struct hvn_softc *sc, int mtu)
4374 {
4375 	struct ndis_offload hwcaps;
4376 	struct ndis_offload_params params;
4377 	size_t len;
4378 	uint32_t caps = 0;
4379 	int error, tso_maxsz, tso_minsg;
4380 
4381 	error = hvn_get_hwcaps(sc, &hwcaps);
4382 	if (error) {
4383 		DPRINTF("%s: failed to query hwcaps\n",
4384 		    device_xname(sc->sc_dev));
4385 		return error;
4386 	}
4387 
4388 	/* NOTE: 0 means "no change" */
4389 	memset(&params, 0, sizeof(params));
4390 
4391 	params.ndis_hdr.ndis_type = NDIS_OBJTYPE_DEFAULT;
4392 	if (sc->sc_ndisver < NDIS_VERSION_6_30) {
4393 		params.ndis_hdr.ndis_rev = NDIS_OFFLOAD_PARAMS_REV_2;
4394 		len = params.ndis_hdr.ndis_size = NDIS_OFFLOAD_PARAMS_SIZE_6_1;
4395 	} else {
4396 		params.ndis_hdr.ndis_rev = NDIS_OFFLOAD_PARAMS_REV_3;
4397 		len = params.ndis_hdr.ndis_size = NDIS_OFFLOAD_PARAMS_SIZE;
4398 	}
4399 
4400 	/*
4401 	 * TSO4/TSO6 setup.
4402 	 */
4403 	tso_maxsz = IP_MAXPACKET;
4404 	tso_minsg = 2;
4405 	if (hwcaps.ndis_lsov2.ndis_ip4_encap & NDIS_OFFLOAD_ENCAP_8023) {
4406 		caps |= HVN_CAPS_TSO4;
4407 		params.ndis_lsov2_ip4 = NDIS_OFFLOAD_LSOV2_ON;
4408 
4409 		if (hwcaps.ndis_lsov2.ndis_ip4_maxsz < tso_maxsz)
4410 			tso_maxsz = hwcaps.ndis_lsov2.ndis_ip4_maxsz;
4411 		if (hwcaps.ndis_lsov2.ndis_ip4_minsg > tso_minsg)
4412 			tso_minsg = hwcaps.ndis_lsov2.ndis_ip4_minsg;
4413 	}
4414 	if ((hwcaps.ndis_lsov2.ndis_ip6_encap & NDIS_OFFLOAD_ENCAP_8023) &&
4415 	    (hwcaps.ndis_lsov2.ndis_ip6_opts & HVN_NDIS_LSOV2_CAP_IP6) ==
4416 	    HVN_NDIS_LSOV2_CAP_IP6) {
4417 		caps |= HVN_CAPS_TSO6;
4418 		params.ndis_lsov2_ip6 = NDIS_OFFLOAD_LSOV2_ON;
4419 
4420 		if (hwcaps.ndis_lsov2.ndis_ip6_maxsz < tso_maxsz)
4421 			tso_maxsz = hwcaps.ndis_lsov2.ndis_ip6_maxsz;
4422 		if (hwcaps.ndis_lsov2.ndis_ip6_minsg > tso_minsg)
4423 			tso_minsg = hwcaps.ndis_lsov2.ndis_ip6_minsg;
4424 	}
4425 	sc->sc_tso_szmax = 0;
4426 	sc->sc_tso_sgmin = 0;
4427 	if (caps & (HVN_CAPS_TSO4 | HVN_CAPS_TSO6)) {
4428 		KASSERTMSG(tso_maxsz <= IP_MAXPACKET,
4429 		    "invalid NDIS TSO maxsz %d", tso_maxsz);
4430 		KASSERTMSG(tso_minsg >= 2,
4431 		    "invalid NDIS TSO minsg %d", tso_minsg);
4432 		if (tso_maxsz < tso_minsg * mtu) {
4433 			DPRINTF("%s: invalid NDIS TSO config: "
4434 			    "maxsz %d, minsg %d, mtu %d; "
4435 			    "disable TSO4 and TSO6\n", device_xname(sc->sc_dev),
4436 			    tso_maxsz, tso_minsg, mtu);
4437 			caps &= ~(HVN_CAPS_TSO4 | HVN_CAPS_TSO6);
4438 			params.ndis_lsov2_ip4 = NDIS_OFFLOAD_LSOV2_OFF;
4439 			params.ndis_lsov2_ip6 = NDIS_OFFLOAD_LSOV2_OFF;
4440 		} else {
4441 			sc->sc_tso_szmax = tso_maxsz;
4442 			sc->sc_tso_sgmin = tso_minsg;
4443 			aprint_debug_dev(sc->sc_dev,
4444 			    "NDIS TSO szmax %d sgmin %d\n",
4445 			    sc->sc_tso_szmax, sc->sc_tso_sgmin);
4446 		}
4447 	}
4448 
4449 	/* IPv4 checksum */
4450 	if ((hwcaps.ndis_csum.ndis_ip4_txcsum & HVN_NDIS_TXCSUM_CAP_IP4) ==
4451 	    HVN_NDIS_TXCSUM_CAP_IP4) {
4452 		caps |= HVN_CAPS_IPCS;
4453 		params.ndis_ip4csum = NDIS_OFFLOAD_PARAM_TX;
4454 	}
4455 	if (hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_IP4) {
4456 		if (params.ndis_ip4csum == NDIS_OFFLOAD_PARAM_TX)
4457 			params.ndis_ip4csum = NDIS_OFFLOAD_PARAM_TXRX;
4458 		else
4459 			params.ndis_ip4csum = NDIS_OFFLOAD_PARAM_RX;
4460 	}
4461 
4462 	/* TCP4 checksum */
4463 	if ((hwcaps.ndis_csum.ndis_ip4_txcsum & HVN_NDIS_TXCSUM_CAP_TCP4) ==
4464 	    HVN_NDIS_TXCSUM_CAP_TCP4) {
4465 		caps |= HVN_CAPS_TCP4CS;
4466 		params.ndis_tcp4csum = NDIS_OFFLOAD_PARAM_TX;
4467 	}
4468 	if (hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_TCP4) {
4469 		if (params.ndis_tcp4csum == NDIS_OFFLOAD_PARAM_TX)
4470 			params.ndis_tcp4csum = NDIS_OFFLOAD_PARAM_TXRX;
4471 		else
4472 			params.ndis_tcp4csum = NDIS_OFFLOAD_PARAM_RX;
4473 	}
4474 
4475 	/* UDP4 checksum */
4476 	if (hwcaps.ndis_csum.ndis_ip4_txcsum & NDIS_TXCSUM_CAP_UDP4) {
4477 		caps |= HVN_CAPS_UDP4CS;
4478 		params.ndis_udp4csum = NDIS_OFFLOAD_PARAM_TX;
4479 	}
4480 	if (hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_UDP4) {
4481 		if (params.ndis_udp4csum == NDIS_OFFLOAD_PARAM_TX)
4482 			params.ndis_udp4csum = NDIS_OFFLOAD_PARAM_TXRX;
4483 		else
4484 			params.ndis_udp4csum = NDIS_OFFLOAD_PARAM_RX;
4485 	}
4486 
4487 	/* TCP6 checksum */
4488 	if ((hwcaps.ndis_csum.ndis_ip6_txcsum & HVN_NDIS_TXCSUM_CAP_TCP6) ==
4489 	    HVN_NDIS_TXCSUM_CAP_TCP6) {
4490 		caps |= HVN_CAPS_TCP6CS;
4491 		params.ndis_tcp6csum = NDIS_OFFLOAD_PARAM_TX;
4492 	}
4493 	if (hwcaps.ndis_csum.ndis_ip6_rxcsum & NDIS_RXCSUM_CAP_TCP6) {
4494 		if (params.ndis_tcp6csum == NDIS_OFFLOAD_PARAM_TX)
4495 			params.ndis_tcp6csum = NDIS_OFFLOAD_PARAM_TXRX;
4496 		else
4497 			params.ndis_tcp6csum = NDIS_OFFLOAD_PARAM_RX;
4498 	}
4499 
4500 	/* UDP6 checksum */
4501 	if ((hwcaps.ndis_csum.ndis_ip6_txcsum & HVN_NDIS_TXCSUM_CAP_UDP6) ==
4502 	    HVN_NDIS_TXCSUM_CAP_UDP6) {
4503 		caps |= HVN_CAPS_UDP6CS;
4504 		params.ndis_udp6csum = NDIS_OFFLOAD_PARAM_TX;
4505 	}
4506 	if (hwcaps.ndis_csum.ndis_ip6_rxcsum & NDIS_RXCSUM_CAP_UDP6) {
4507 		if (params.ndis_udp6csum == NDIS_OFFLOAD_PARAM_TX)
4508 			params.ndis_udp6csum = NDIS_OFFLOAD_PARAM_TXRX;
4509 		else
4510 			params.ndis_udp6csum = NDIS_OFFLOAD_PARAM_RX;
4511 	}
4512 
4513 	aprint_debug_dev(sc->sc_dev, "offload csum: "
4514 	    "ip4 %u, tcp4 %u, udp4 %u, tcp6 %u, udp6 %u\n",
4515 	    params.ndis_ip4csum, params.ndis_tcp4csum, params.ndis_udp4csum,
4516 	    params.ndis_tcp6csum, params.ndis_udp6csum);
4517 	aprint_debug_dev(sc->sc_dev, "offload lsov2: ip4 %u, ip6 %u\n",
4518 	    params.ndis_lsov2_ip4, params.ndis_lsov2_ip6);
4519 
4520 	error = hvn_rndis_set(sc, OID_TCP_OFFLOAD_PARAMETERS, &params, len);
4521 	if (error) {
4522 		DPRINTF("%s: offload config failed: %d\n",
4523 		    device_xname(sc->sc_dev), error);
4524 		return error;
4525 	}
4526 
4527 	aprint_debug_dev(sc->sc_dev, "offload config done\n");
4528 	sc->sc_caps |= caps;
4529 
4530 	return 0;
4531 }
4532 
4533 static int
hvn_rndis_cmd(struct hvn_softc * sc,struct rndis_cmd * rc,u_int flags)4534 hvn_rndis_cmd(struct hvn_softc *sc, struct rndis_cmd *rc, u_int flags)
4535 {
4536 	struct hvn_rx_ring *rxr = &sc->sc_rxr[0];	/* primary channel */
4537 	struct hvn_nvs_rndis *msg = &rc->rc_msg;
4538 	struct rndis_msghdr *hdr = rc->rc_req;
4539 	struct vmbus_gpa sgl[1];
4540 	int tries = 10;
4541 	int rv, s;
4542 
4543 	msg->nvs_type = HVN_NVS_TYPE_RNDIS;
4544 	msg->nvs_rndis_mtype = HVN_NVS_RNDIS_MTYPE_CTRL;
4545 	msg->nvs_chim_idx = HVN_NVS_CHIM_IDX_INVALID;
4546 
4547 	sgl[0].gpa_page = rc->rc_gpa;
4548 	sgl[0].gpa_len = hdr->rm_len;
4549 	sgl[0].gpa_ofs = 0;
4550 
4551 	rc->rc_done = 0;
4552 
4553 	mutex_enter(&rc->rc_lock);
4554 
4555 	hvn_submit_cmd(sc, rc);
4556 
4557 	do {
4558 		rv = vmbus_channel_send_sgl(rxr->rxr_chan, sgl, 1, &rc->rc_msg,
4559 		    sizeof(*msg), rc->rc_id);
4560 		if (rv == EAGAIN) {
4561 			DELAY(1000);
4562 		} else if (rv) {
4563 			mutex_exit(&rc->rc_lock);
4564 			DPRINTF("%s: RNDIS operation %u send error %d\n",
4565 			    device_xname(sc->sc_dev), hdr->rm_type, rv);
4566 			hvn_rollback_cmd(sc, rc);
4567 			return rv;
4568 		}
4569 	} while (rv != 0 && --tries > 0);
4570 
4571 	if (tries == 0 && rv != 0) {
4572 		mutex_exit(&rc->rc_lock);
4573 		device_printf(sc->sc_dev,
4574 		    "RNDIS operation %u send error %d\n", hdr->rm_type, rv);
4575 		hvn_rollback_cmd(sc, rc);
4576 		return rv;
4577 	}
4578 	if (vmbus_channel_is_revoked(rxr->rxr_chan) ||
4579 	    ISSET(flags, HVN_RNDIS_CMD_NORESP)) {
4580 		/* No response */
4581 		mutex_exit(&rc->rc_lock);
4582 		if (hvn_rollback_cmd(sc, rc))
4583 			hvn_release_cmd(sc, rc);
4584 		return 0;
4585 	}
4586 
4587 	bus_dmamap_sync(sc->sc_dmat, rc->rc_dmap, 0, PAGE_SIZE,
4588 	    BUS_DMASYNC_POSTWRITE);
4589 
4590 	while (!rc->rc_done && !ISSET(sc->sc_flags, HVN_SCF_REVOKED)) {
4591 		mutex_exit(&rc->rc_lock);
4592 		DELAY(1000);
4593 		s = splnet();
4594 		hvn_nvs_intr1(rxr, 0, 0);
4595 		splx(s);
4596 		mutex_enter(&rc->rc_lock);
4597 	}
4598 	mutex_exit(&rc->rc_lock);
4599 
4600 	bus_dmamap_sync(sc->sc_dmat, rc->rc_dmap, 0, PAGE_SIZE,
4601 	    BUS_DMASYNC_POSTREAD);
4602 
4603 	if (!rc->rc_done) {
4604 		rv = EINTR;
4605 		if (hvn_rollback_cmd(sc, rc)) {
4606 			hvn_release_cmd(sc, rc);
4607 			rv = 0;
4608 		}
4609 		return rv;
4610 	}
4611 
4612 	hvn_release_cmd(sc, rc);
4613 	return 0;
4614 }
4615 
4616 static int
hvn_rndis_input(struct hvn_rx_ring * rxr,uint64_t tid,void * arg)4617 hvn_rndis_input(struct hvn_rx_ring *rxr, uint64_t tid, void *arg)
4618 {
4619 	struct hvn_softc *sc = rxr->rxr_softc;
4620 	struct vmbus_chanpkt_prplist *cp = arg;
4621 	uint32_t off, len, type;
4622 	int i, rv, rx = 0;
4623 	bool qfull = false;
4624 
4625 	if (sc->sc_rx_ring == NULL) {
4626 		DPRINTF("%s: invalid rx ring\n", device_xname(sc->sc_dev));
4627 		return 0;
4628 	}
4629 
4630 	for (i = 0; i < cp->cp_range_cnt; i++) {
4631 		off = cp->cp_range[i].gpa_ofs;
4632 		len = cp->cp_range[i].gpa_len;
4633 
4634 		KASSERT(off + len <= sc->sc_rx_size);
4635 		KASSERT(len >= RNDIS_HEADER_OFFSET + 4);
4636 
4637 		memcpy(&type, sc->sc_rx_ring + off, sizeof(type));
4638 		switch (type) {
4639 		/* data message */
4640 		case REMOTE_NDIS_PACKET_MSG:
4641 			rv = hvn_rxeof(rxr, sc->sc_rx_ring + off, len);
4642 			if (rv == 1)
4643 				rx++;
4644 			else if (rv == -1)	/* The receive queue is full. */
4645 				qfull = true;
4646 			break;
4647 		/* completion messages */
4648 		case REMOTE_NDIS_INITIALIZE_CMPLT:
4649 		case REMOTE_NDIS_QUERY_CMPLT:
4650 		case REMOTE_NDIS_SET_CMPLT:
4651 		case REMOTE_NDIS_RESET_CMPLT:
4652 		case REMOTE_NDIS_KEEPALIVE_CMPLT:
4653 			hvn_rndis_complete(sc, sc->sc_rx_ring + off, len);
4654 			break;
4655 		/* notification message */
4656 		case REMOTE_NDIS_INDICATE_STATUS_MSG:
4657 			hvn_rndis_status(sc, sc->sc_rx_ring + off, len);
4658 			break;
4659 		default:
4660 			device_printf(sc->sc_dev,
4661 			    "unhandled RNDIS message type %u\n", type);
4662 			break;
4663 		}
4664 	}
4665 
4666 	hvn_nvs_ack(rxr, tid);
4667 
4668 	if (qfull)
4669 		return -1;
4670 	return rx;
4671 }
4672 
4673 static inline struct mbuf *
hvn_devget(struct hvn_softc * sc,void * buf,uint32_t len)4674 hvn_devget(struct hvn_softc *sc, void *buf, uint32_t len)
4675 {
4676 	struct ifnet *ifp = SC2IFP(sc);
4677 	struct mbuf *m;
4678 	size_t size = len + ETHER_ALIGN + ETHER_VLAN_ENCAP_LEN;
4679 
4680 	MGETHDR(m, M_NOWAIT, MT_DATA);
4681 	if (m == NULL)
4682 		return NULL;
4683 
4684 	if (size > MHLEN) {
4685 		if (size <= MCLBYTES)
4686 			MCLGET(m, M_NOWAIT);
4687 		else
4688 			MEXTMALLOC(m, size, M_NOWAIT);
4689 		if ((m->m_flags & M_EXT) == 0) {
4690 			m_freem(m);
4691 			return NULL;
4692 		}
4693 	}
4694 
4695 	m->m_len = m->m_pkthdr.len = size;
4696 	m_adj(m, ETHER_ALIGN + ETHER_VLAN_ENCAP_LEN);
4697 	m_copyback(m, 0, len, buf);
4698 	m_set_rcvif(m, ifp);
4699 	return m;
4700 }
4701 
4702 #define HVN_RXINFO_CSUM		__BIT(NDIS_PKTINFO_TYPE_CSUM)
4703 #define HVN_RXINFO_VLAN		__BIT(NDIS_PKTINFO_TYPE_VLAN)
4704 #define HVN_RXINFO_HASHVAL	__BIT(HVN_NDIS_PKTINFO_TYPE_HASHVAL)
4705 #define HVN_RXINFO_HASHINFO	__BIT(HVN_NDIS_PKTINFO_TYPE_HASHINF)
4706 #define HVN_RXINFO_ALL		(HVN_RXINFO_CSUM | \
4707 				 HVN_RXINFO_VLAN | \
4708 				 HVN_RXINFO_HASHVAL | \
4709 				 HVN_RXINFO_HASHINFO)
4710 
4711 static int
hvn_rxeof(struct hvn_rx_ring * rxr,uint8_t * buf,uint32_t len)4712 hvn_rxeof(struct hvn_rx_ring *rxr, uint8_t *buf, uint32_t len)
4713 {
4714 	struct hvn_softc *sc = rxr->rxr_softc;
4715 	struct ifnet *ifp = SC2IFP(sc);
4716 	struct rndis_packet_msg *pkt;
4717 	struct rndis_pktinfo *pi;
4718 	struct mbuf *m;
4719 	uint32_t mask, csum, vlan, hashval, hashinfo;
4720 
4721 	if (!(ifp->if_flags & IFF_RUNNING))
4722 		return 0;
4723 
4724 	if (len < sizeof(*pkt)) {
4725 		device_printf(sc->sc_dev, "data packet too short: %u\n",
4726 		    len);
4727 		return 0;
4728 	}
4729 
4730 	pkt = (struct rndis_packet_msg *)buf;
4731 	if (pkt->rm_dataoffset + pkt->rm_datalen > len) {
4732 		device_printf(sc->sc_dev,
4733 		    "data packet out of bounds: %u@%u\n", pkt->rm_dataoffset,
4734 		    pkt->rm_datalen);
4735 		return 0;
4736 	}
4737 
4738 	if ((m = hvn_devget(sc, buf + RNDIS_HEADER_OFFSET + pkt->rm_dataoffset,
4739 	    pkt->rm_datalen)) == NULL) {
4740 		if_statinc(ifp, if_ierrors);
4741 		return 0;
4742 	}
4743 
4744 	if (pkt->rm_pktinfooffset + pkt->rm_pktinfolen > len) {
4745 		device_printf(sc->sc_dev,
4746 		    "pktinfo is out of bounds: %u@%u vs %u\n",
4747 		    pkt->rm_pktinfolen, pkt->rm_pktinfooffset, len);
4748 		goto done;
4749 	}
4750 
4751 	mask = csum = hashval = hashinfo = 0;
4752 	vlan = 0xffffffff;
4753 	pi = (struct rndis_pktinfo *)(buf + RNDIS_HEADER_OFFSET +
4754 	    pkt->rm_pktinfooffset);
4755 	while (pkt->rm_pktinfolen > 0) {
4756 		if (pi->rm_size > pkt->rm_pktinfolen) {
4757 			device_printf(sc->sc_dev,
4758 			    "invalid pktinfo size: %u/%u\n", pi->rm_size,
4759 			    pkt->rm_pktinfolen);
4760 			break;
4761 		}
4762 
4763 		switch (pi->rm_type) {
4764 		case NDIS_PKTINFO_TYPE_CSUM:
4765 			memcpy(&csum, pi->rm_data, sizeof(csum));
4766 			SET(mask, HVN_RXINFO_CSUM);
4767 			break;
4768 		case NDIS_PKTINFO_TYPE_VLAN:
4769 			memcpy(&vlan, pi->rm_data, sizeof(vlan));
4770 			SET(mask, HVN_RXINFO_VLAN);
4771 			break;
4772 		case HVN_NDIS_PKTINFO_TYPE_HASHVAL:
4773 			memcpy(&hashval, pi->rm_data, sizeof(hashval));
4774 			SET(mask, HVN_RXINFO_HASHVAL);
4775 			break;
4776 		case HVN_NDIS_PKTINFO_TYPE_HASHINF:
4777 			memcpy(&hashinfo, pi->rm_data, sizeof(hashinfo));
4778 			SET(mask, HVN_RXINFO_HASHINFO);
4779 			break;
4780 		default:
4781 			DPRINTF("%s: unhandled pktinfo type %u\n",
4782 			    device_xname(sc->sc_dev), pi->rm_type);
4783 			goto next;
4784 		}
4785 
4786 		if (mask == HVN_RXINFO_ALL) {
4787 			/* All found; done */
4788 			break;
4789 		}
4790  next:
4791 		pkt->rm_pktinfolen -= pi->rm_size;
4792 		pi = (struct rndis_pktinfo *)((char *)pi + pi->rm_size);
4793 	}
4794 
4795 	/*
4796 	 * Final fixup.
4797 	 * - If there is no hash value, invalidate the hash info.
4798 	 */
4799 	if (!ISSET(mask, HVN_RXINFO_HASHVAL))
4800 		hashinfo = 0;
4801 
4802 	if (csum != 0) {
4803 		if (ISSET(csum, NDIS_RXCSUM_INFO_IPCS_OK) &&
4804 			ISSET(ifp->if_csum_flags_rx, M_CSUM_IPv4)) {
4805 			SET(m->m_pkthdr.csum_flags, M_CSUM_IPv4);
4806 			rxr->rxr_evcsum_ip.ev_count++;
4807 		}
4808 		if (ISSET(csum, NDIS_RXCSUM_INFO_TCPCS_OK) &&
4809 			ISSET(ifp->if_csum_flags_rx, M_CSUM_TCPv4)) {
4810 			SET(m->m_pkthdr.csum_flags, M_CSUM_TCPv4);
4811 			rxr->rxr_evcsum_tcp.ev_count++;
4812 		}
4813 		if (ISSET(csum, NDIS_RXCSUM_INFO_UDPCS_OK) &&
4814 			ISSET(ifp->if_csum_flags_rx, M_CSUM_UDPv4)) {
4815 			SET(m->m_pkthdr.csum_flags, M_CSUM_UDPv4);
4816 			rxr->rxr_evcsum_udp.ev_count++;
4817 		}
4818 	}
4819 
4820 	if (vlan != 0xffffffff) {
4821 		uint16_t t = NDIS_VLAN_INFO_ID(vlan);
4822 		t |= NDIS_VLAN_INFO_PRI(vlan) << EVL_PRIO_BITS;
4823 		t |= NDIS_VLAN_INFO_CFI(vlan) << EVL_CFI_BITS;
4824 
4825 		if (ISSET(sc->sc_ec.ec_capenable, ETHERCAP_VLAN_HWTAGGING)) {
4826 			vlan_set_tag(m, t);
4827 			rxr->rxr_evvlanhwtagging.ev_count++;
4828 		} else {
4829 			struct ether_header eh;
4830 			struct ether_vlan_header *evl;
4831 
4832 			KDASSERT(m->m_pkthdr.len >= sizeof(eh));
4833 			m_copydata(m, 0, sizeof(eh), &eh);
4834 			M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT);
4835 			KDASSERT(m != NULL);
4836 
4837 			evl = mtod(m, struct ether_vlan_header *);
4838 			memcpy(evl->evl_dhost, eh.ether_dhost,
4839 			    ETHER_ADDR_LEN * 2);
4840 			evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
4841 			evl->evl_tag = htons(t);
4842 			evl->evl_proto = eh.ether_type;
4843 		}
4844 	}
4845 
4846 	/* XXX RSS hash is not supported. */
4847 
4848  done:
4849 	rxr->rxr_evpkts.ev_count++;
4850 	if_percpuq_enqueue(sc->sc_ipq, m);
4851 	/* XXX Unable to detect that the receive queue is full. */
4852 	return 1;
4853 }
4854 
4855 static void
hvn_rndis_complete(struct hvn_softc * sc,uint8_t * buf,uint32_t len)4856 hvn_rndis_complete(struct hvn_softc *sc, uint8_t *buf, uint32_t len)
4857 {
4858 	struct rndis_cmd *rc;
4859 	uint32_t id;
4860 
4861 	memcpy(&id, buf + RNDIS_HEADER_OFFSET, sizeof(id));
4862 	if ((rc = hvn_complete_cmd(sc, id)) != NULL) {
4863 		mutex_enter(&rc->rc_lock);
4864 		if (len < rc->rc_cmplen)
4865 			device_printf(sc->sc_dev,
4866 			    "RNDIS response %u too short: %u\n", id, len);
4867 		else
4868 			memcpy(&rc->rc_cmp, buf, rc->rc_cmplen);
4869 		if (len > rc->rc_cmplen &&
4870 		    len - rc->rc_cmplen > HVN_RNDIS_BUFSIZE)
4871 			device_printf(sc->sc_dev,
4872 			    "RNDIS response %u too large: %u\n", id, len);
4873 		else if (len > rc->rc_cmplen)
4874 			memcpy(&rc->rc_cmpbuf, buf + rc->rc_cmplen,
4875 			    len - rc->rc_cmplen);
4876 		rc->rc_done = 1;
4877 		cv_signal(&rc->rc_cv);
4878 		mutex_exit(&rc->rc_lock);
4879 	} else {
4880 		DPRINTF("%s: failed to complete RNDIS request id %u\n",
4881 		    device_xname(sc->sc_dev), id);
4882 	}
4883 }
4884 
4885 static int
hvn_rndis_output_sgl(struct hvn_tx_ring * txr,struct hvn_tx_desc * txd)4886 hvn_rndis_output_sgl(struct hvn_tx_ring *txr, struct hvn_tx_desc *txd)
4887 {
4888 	struct hvn_softc *sc = txr->txr_softc;
4889 	uint64_t rid = (uint64_t)txd->txd_id << 32;
4890 	int rv;
4891 
4892 	rv = vmbus_channel_send_sgl(txr->txr_chan, txd->txd_sgl, txd->txd_nsge,
4893 	    &sc->sc_data_msg, sizeof(sc->sc_data_msg), rid);
4894 	if (rv) {
4895 		DPRINTF("%s: RNDIS data send error %d\n",
4896 		    device_xname(sc->sc_dev), rv);
4897 		return rv;
4898 	}
4899 	return 0;
4900 }
4901 
4902 static int
hvn_rndis_output_chim(struct hvn_tx_ring * txr,struct hvn_tx_desc * txd)4903 hvn_rndis_output_chim(struct hvn_tx_ring *txr, struct hvn_tx_desc *txd)
4904 {
4905 	struct hvn_nvs_rndis rndis;
4906 	uint64_t rid = (uint64_t)txd->txd_id << 32;
4907 	int rv;
4908 
4909 	memset(&rndis, 0, sizeof(rndis));
4910 	rndis.nvs_type = HVN_NVS_TYPE_RNDIS;
4911 	rndis.nvs_rndis_mtype = HVN_NVS_RNDIS_MTYPE_DATA;
4912 	rndis.nvs_chim_idx = txd->txd_chim_index;
4913 	rndis.nvs_chim_sz = txd->txd_chim_size;
4914 
4915 	rv = vmbus_channel_send(txr->txr_chan, &rndis, sizeof(rndis),
4916 	    rid, VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC);
4917 	if (rv) {
4918 		DPRINTF("%s: RNDIS chimney data send error %d: idx %u, sz %u\n",
4919 		    device_xname(sc->sc_dev), rv, rndis.nvs_chim_idx,
4920 		    rndis.nvs_chim_sz);
4921 		return rv;
4922 	}
4923 	return 0;
4924 }
4925 
4926 static void
hvn_rndis_status(struct hvn_softc * sc,uint8_t * buf,uint32_t len)4927 hvn_rndis_status(struct hvn_softc *sc, uint8_t *buf, uint32_t len)
4928 {
4929 	uint32_t status;
4930 
4931 	memcpy(&status, buf + RNDIS_HEADER_OFFSET, sizeof(status));
4932 	switch (status) {
4933 	case RNDIS_STATUS_MEDIA_CONNECT:
4934 	case RNDIS_STATUS_MEDIA_DISCONNECT:
4935 		hvn_link_event(sc, HVN_LINK_EV_STATE_CHANGE);
4936 		break;
4937 	case RNDIS_STATUS_NETWORK_CHANGE:
4938 		hvn_link_event(sc, HVN_LINK_EV_NETWORK_CHANGE);
4939 		break;
4940 	/* Ignore these */
4941 	case RNDIS_STATUS_OFFLOAD_CURRENT_CONFIG:
4942 	case RNDIS_STATUS_LINK_SPEED_CHANGE:
4943 		return;
4944 	default:
4945 		DPRINTF("%s: unhandled status %#x\n", device_xname(sc->sc_dev),
4946 		    status);
4947 		return;
4948 	}
4949 }
4950 
4951 static int
hvn_rndis_query(struct hvn_softc * sc,uint32_t oid,void * res,size_t * length)4952 hvn_rndis_query(struct hvn_softc *sc, uint32_t oid, void *res, size_t *length)
4953 {
4954 
4955 	return hvn_rndis_query2(sc, oid, NULL, 0, res, length, 0);
4956 }
4957 
4958 static int
hvn_rndis_query2(struct hvn_softc * sc,uint32_t oid,const void * idata,size_t idlen,void * odata,size_t * odlen,size_t min_odlen)4959 hvn_rndis_query2(struct hvn_softc *sc, uint32_t oid, const void *idata,
4960     size_t idlen, void *odata, size_t *odlen, size_t min_odlen)
4961 {
4962 	struct rndis_cmd *rc;
4963 	struct rndis_query_req *req;
4964 	struct rndis_query_comp *cmp;
4965 	size_t olength = *odlen;
4966 	int rv;
4967 
4968 	rc = hvn_alloc_cmd(sc);
4969 
4970 	bus_dmamap_sync(sc->sc_dmat, rc->rc_dmap, 0, PAGE_SIZE,
4971 	    BUS_DMASYNC_PREREAD);
4972 
4973 	rc->rc_id = atomic_inc_uint_nv(&sc->sc_rndisrid);
4974 
4975 	req = rc->rc_req;
4976 	req->rm_type = REMOTE_NDIS_QUERY_MSG;
4977 	req->rm_len = sizeof(*req) + idlen;
4978 	req->rm_rid = rc->rc_id;
4979 	req->rm_oid = oid;
4980 	req->rm_infobufoffset = sizeof(*req) - RNDIS_HEADER_OFFSET;
4981 	if (idlen > 0) {
4982 		KASSERT(sizeof(*req) + idlen <= PAGE_SIZE);
4983 		req->rm_infobuflen = idlen;
4984 		memcpy(req + 1, idata, idlen);
4985 	}
4986 
4987 	rc->rc_cmplen = sizeof(*cmp);
4988 
4989 	bus_dmamap_sync(sc->sc_dmat, rc->rc_dmap, 0, PAGE_SIZE,
4990 	    BUS_DMASYNC_PREWRITE);
4991 
4992 	if ((rv = hvn_rndis_cmd(sc, rc, 0)) != 0) {
4993 		DPRINTF("%s: QUERY_MSG failed, error %d\n",
4994 		    device_xname(sc->sc_dev), rv);
4995 		hvn_free_cmd(sc, rc);
4996 		return rv;
4997 	}
4998 
4999 	cmp = (struct rndis_query_comp *)&rc->rc_cmp;
5000 	switch (cmp->rm_status) {
5001 	case RNDIS_STATUS_SUCCESS:
5002 		if (cmp->rm_infobuflen > olength ||
5003 		    (min_odlen > 0 && cmp->rm_infobuflen < min_odlen)) {
5004 			rv = EINVAL;
5005 			break;
5006 		}
5007 		memcpy(odata, rc->rc_cmpbuf, cmp->rm_infobuflen);
5008 		*odlen = cmp->rm_infobuflen;
5009 		break;
5010 	default:
5011 		*odlen = 0;
5012 		rv = EIO;
5013 		break;
5014 	}
5015 
5016 	hvn_free_cmd(sc, rc);
5017 	return rv;
5018 }
5019 
5020 static int
hvn_rndis_set(struct hvn_softc * sc,uint32_t oid,void * data,size_t length)5021 hvn_rndis_set(struct hvn_softc *sc, uint32_t oid, void *data, size_t length)
5022 {
5023 	struct rndis_cmd *rc;
5024 	struct rndis_set_req *req;
5025 	struct rndis_set_comp *cmp;
5026 	int rv;
5027 
5028 	rc = hvn_alloc_cmd(sc);
5029 
5030 	bus_dmamap_sync(sc->sc_dmat, rc->rc_dmap, 0, PAGE_SIZE,
5031 	    BUS_DMASYNC_PREREAD);
5032 
5033 	rc->rc_id = atomic_inc_uint_nv(&sc->sc_rndisrid);
5034 
5035 	req = rc->rc_req;
5036 	req->rm_type = REMOTE_NDIS_SET_MSG;
5037 	req->rm_len = sizeof(*req) + length;
5038 	req->rm_rid = rc->rc_id;
5039 	req->rm_oid = oid;
5040 	req->rm_infobufoffset = sizeof(*req) - RNDIS_HEADER_OFFSET;
5041 
5042 	rc->rc_cmplen = sizeof(*cmp);
5043 
5044 	if (length > 0) {
5045 		KASSERT(sizeof(*req) + length < PAGE_SIZE);
5046 		req->rm_infobuflen = length;
5047 		memcpy(req + 1, data, length);
5048 	}
5049 
5050 	bus_dmamap_sync(sc->sc_dmat, rc->rc_dmap, 0, PAGE_SIZE,
5051 	    BUS_DMASYNC_PREWRITE);
5052 
5053 	if ((rv = hvn_rndis_cmd(sc, rc, 0)) != 0) {
5054 		DPRINTF("%s: SET_MSG failed, error %d\n",
5055 		    device_xname(sc->sc_dev), rv);
5056 		hvn_free_cmd(sc, rc);
5057 		return rv;
5058 	}
5059 
5060 	cmp = (struct rndis_set_comp *)&rc->rc_cmp;
5061 	if (cmp->rm_status != RNDIS_STATUS_SUCCESS)
5062 		rv = EIO;
5063 
5064 	hvn_free_cmd(sc, rc);
5065 	return rv;
5066 }
5067 
5068 static int
hvn_rndis_open(struct hvn_softc * sc)5069 hvn_rndis_open(struct hvn_softc *sc)
5070 {
5071 	struct ifnet *ifp = SC2IFP(sc);
5072 	uint32_t filter;
5073 	int rv;
5074 
5075 	if (ifp->if_flags & IFF_PROMISC) {
5076 		filter = RNDIS_PACKET_TYPE_PROMISCUOUS;
5077 	} else {
5078 		filter = RNDIS_PACKET_TYPE_DIRECTED;
5079 		if (ifp->if_flags & IFF_BROADCAST)
5080 			filter |= RNDIS_PACKET_TYPE_BROADCAST;
5081 		if (ifp->if_flags & IFF_ALLMULTI)
5082 			filter |= RNDIS_PACKET_TYPE_ALL_MULTICAST;
5083 		else {
5084 			struct ethercom *ec = &sc->sc_ec;
5085 			struct ether_multi *enm;
5086 			struct ether_multistep step;
5087 
5088 			ETHER_LOCK(ec);
5089 			ETHER_FIRST_MULTI(step, ec, enm);
5090 			/* TODO: support multicast list */
5091 			if (enm != NULL)
5092 				filter |= RNDIS_PACKET_TYPE_ALL_MULTICAST;
5093 			ETHER_UNLOCK(ec);
5094 		}
5095 	}
5096 
5097 	rv = hvn_rndis_set(sc, OID_GEN_CURRENT_PACKET_FILTER,
5098 	    &filter, sizeof(filter));
5099 	if (rv) {
5100 		DPRINTF("%s: failed to set RNDIS filter to %#x\n",
5101 		    device_xname(sc->sc_dev), filter);
5102 	}
5103 	return rv;
5104 }
5105 
5106 static int
hvn_rndis_close(struct hvn_softc * sc)5107 hvn_rndis_close(struct hvn_softc *sc)
5108 {
5109 	uint32_t filter = 0;
5110 	int rv;
5111 
5112 	rv = hvn_rndis_set(sc, OID_GEN_CURRENT_PACKET_FILTER,
5113 	    &filter, sizeof(filter));
5114 	if (rv) {
5115 		DPRINTF("%s: failed to clear RNDIS filter\n",
5116 		    device_xname(sc->sc_dev));
5117 	}
5118 	return rv;
5119 }
5120 
5121 static void
hvn_rndis_detach(struct hvn_softc * sc)5122 hvn_rndis_detach(struct hvn_softc *sc)
5123 {
5124 	struct rndis_cmd *rc;
5125 	struct rndis_halt_req *req;
5126 	int rv;
5127 
5128 	rc = hvn_alloc_cmd(sc);
5129 
5130 	bus_dmamap_sync(sc->sc_dmat, rc->rc_dmap, 0, PAGE_SIZE,
5131 	    BUS_DMASYNC_PREREAD);
5132 
5133 	rc->rc_id = atomic_inc_uint_nv(&sc->sc_rndisrid);
5134 
5135 	req = rc->rc_req;
5136 	req->rm_type = REMOTE_NDIS_HALT_MSG;
5137 	req->rm_len = sizeof(*req);
5138 	req->rm_rid = rc->rc_id;
5139 
5140 	bus_dmamap_sync(sc->sc_dmat, rc->rc_dmap, 0, PAGE_SIZE,
5141 	    BUS_DMASYNC_PREWRITE);
5142 
5143 	/* No RNDIS completion; rely on NVS message send completion */
5144 	if ((rv = hvn_rndis_cmd(sc, rc, HVN_RNDIS_CMD_NORESP)) != 0) {
5145 		DPRINTF("%s: HALT_MSG failed, error %d\n",
5146 		    device_xname(sc->sc_dev), rv);
5147 	}
5148 	hvn_free_cmd(sc, rc);
5149 }
5150 
5151 static void
hvn_init_sysctls(struct hvn_softc * sc)5152 hvn_init_sysctls(struct hvn_softc *sc)
5153 {
5154 	struct sysctllog **log;
5155 	const struct sysctlnode *rnode, *cnode, *rxnode, *txnode;
5156 	const char *dvname;
5157 	int error;
5158 
5159 	log = &sc->sc_sysctllog;
5160 	dvname = device_xname(sc->sc_dev);
5161 
5162 	error = sysctl_createv(log, 0, NULL, &rnode,
5163 	    0, CTLTYPE_NODE, dvname,
5164 	    SYSCTL_DESCR("hvn information and settings"),
5165 	    NULL, 0, NULL, 0, CTL_HW, CTL_CREATE, CTL_EOL);
5166 	if (error)
5167 		goto err;
5168 
5169 	error = sysctl_createv(log, 0, &rnode, &cnode,
5170 	    CTLFLAG_READWRITE, CTLTYPE_BOOL, "txrx_workqueue",
5171 	    SYSCTL_DESCR("Use workqueue for packet processing"),
5172 	    NULL, 0, &sc->sc_txrx_workqueue, 0, CTL_CREATE, CTL_EOL);
5173 	if (error)
5174 		goto out;
5175 
5176 	error = sysctl_createv(log, 0, &rnode, &rxnode,
5177 	    0, CTLTYPE_NODE, "rx",
5178 	    SYSCTL_DESCR("hvn information and settings for Rx"),
5179 	    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
5180 	if (error)
5181 		goto out;
5182 
5183 	error = sysctl_createv(log, 0, &rxnode, NULL,
5184 	    CTLFLAG_READWRITE, CTLTYPE_INT, "intr_process_limit",
5185 	    SYSCTL_DESCR("max number of Rx packets"
5186 	      " to process for interrupt processing"),
5187 	    NULL, 0, &sc->sc_rx_intr_process_limit, 0, CTL_CREATE, CTL_EOL);
5188 	if (error)
5189 		goto out;
5190 
5191 	error = sysctl_createv(log, 0, &rxnode, NULL,
5192 	    CTLFLAG_READWRITE, CTLTYPE_INT, "process_limit",
5193 	    SYSCTL_DESCR("max number of Rx packets"
5194 	      " to process for deferred processing"),
5195 	    NULL, 0, &sc->sc_rx_process_limit, 0, CTL_CREATE, CTL_EOL);
5196 	if (error)
5197 		goto out;
5198 
5199 	error = sysctl_createv(log, 0, &rnode, &txnode,
5200 	    0, CTLTYPE_NODE, "tx",
5201 	    SYSCTL_DESCR("hvn information and settings for Tx"),
5202 	    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
5203 	if (error)
5204 		goto out;
5205 
5206 	error = sysctl_createv(log, 0, &txnode, NULL,
5207 	    CTLFLAG_READWRITE, CTLTYPE_INT, "intr_process_limit",
5208 	    SYSCTL_DESCR("max number of Tx packets"
5209 	      " to process for interrupt processing"),
5210 	    NULL, 0, &sc->sc_tx_intr_process_limit, 0, CTL_CREATE, CTL_EOL);
5211 	if (error)
5212 		goto out;
5213 
5214 	error = sysctl_createv(log, 0, &txnode, NULL,
5215 	    CTLFLAG_READWRITE, CTLTYPE_INT, "process_limit",
5216 	    SYSCTL_DESCR("max number of Tx packets"
5217 	      " to process for deferred processing"),
5218 	    NULL, 0, &sc->sc_tx_process_limit, 0, CTL_CREATE, CTL_EOL);
5219 	if (error)
5220 		goto out;
5221 
5222 	return;
5223 
5224 out:
5225 	sysctl_teardown(log);
5226 	sc->sc_sysctllog = NULL;
5227 err:
5228 	aprint_error_dev(sc->sc_dev, "sysctl_createv failed (err = %d)\n",
5229 	    error);
5230 }
5231 
5232 SYSCTL_SETUP(sysctl_hw_hvn_setup, "sysctl hw.hvn setup")
5233 {
5234 	const struct sysctlnode *rnode;
5235 	const struct sysctlnode *cnode;
5236 	int error;
5237 
5238 	error = sysctl_createv(clog, 0, NULL, &rnode,
5239 	    CTLFLAG_PERMANENT, CTLTYPE_NODE, "hvn",
5240 	    SYSCTL_DESCR("hvn global controls"),
5241 	    NULL, 0, NULL, 0, CTL_HW, CTL_CREATE, CTL_EOL);
5242 	if (error)
5243 		goto fail;
5244 
5245 	error = sysctl_createv(clog, 0, &rnode, &cnode,
5246 	    CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT,
5247 	    "udp_csum_fixup_mtu",
5248 	    SYSCTL_DESCR("UDP checksum offloding fixup MTU"),
5249 	    NULL, 0, &hvn_udpcs_fixup_mtu, sizeof(hvn_udpcs_fixup_mtu),
5250 	    CTL_CREATE, CTL_EOL);
5251 	if (error)
5252 		goto fail;
5253 
5254 	error = sysctl_createv(clog, 0, &rnode, &cnode,
5255 	    CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT,
5256 	    "chimney_size",
5257 	    SYSCTL_DESCR("Chimney send packet size limit"),
5258 	    NULL, 0, &hvn_tx_chimney_size, sizeof(hvn_tx_chimney_size),
5259 	    CTL_CREATE, CTL_EOL);
5260 	if (error)
5261 		goto fail;
5262 
5263 	error = sysctl_createv(clog, 0, &rnode, &cnode,
5264 	    CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT,
5265 	    "channel_count",
5266 	    SYSCTL_DESCR("# of channels to use"),
5267 	    NULL, 0, &hvn_channel_cnt, sizeof(hvn_channel_cnt),
5268 	    CTL_CREATE, CTL_EOL);
5269 	if (error)
5270 		goto fail;
5271 
5272 	error = sysctl_createv(clog, 0, &rnode, &cnode,
5273 	    CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT,
5274 	    "tx_ring_count",
5275 	    SYSCTL_DESCR("# of transmit rings to use"),
5276 	    NULL, 0, &hvn_tx_ring_cnt, sizeof(hvn_tx_ring_cnt),
5277 	    CTL_CREATE, CTL_EOL);
5278 	if (error)
5279 		goto fail;
5280 
5281 	return;
5282 
5283 fail:
5284 	aprint_error("%s: sysctl_createv failed (err = %d)\n", __func__, error);
5285 }
5286