xref: /dpdk/drivers/net/netvsc/hn_rxtx.c (revision cc9ecbb48ee3a8fb80df6c470141260df3eacec0)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2016-2018 Microsoft Corporation
3  * Copyright(c) 2013-2016 Brocade Communications Systems, Inc.
4  * All rights reserved.
5  */
6 
7 #include <stdint.h>
8 #include <string.h>
9 #include <stdio.h>
10 #include <errno.h>
11 #include <unistd.h>
12 #include <strings.h>
13 
14 #include <rte_ethdev.h>
15 #include <rte_memcpy.h>
16 #include <rte_string_fns.h>
17 #include <rte_memzone.h>
18 #include <rte_malloc.h>
19 #include <rte_atomic.h>
20 #include <rte_branch_prediction.h>
21 #include <rte_ether.h>
22 #include <rte_common.h>
23 #include <rte_errno.h>
24 #include <rte_memory.h>
25 #include <rte_eal.h>
26 #include <rte_dev.h>
27 #include <rte_net.h>
28 #include <rte_bus_vmbus.h>
29 #include <rte_spinlock.h>
30 
31 #include "hn_logs.h"
32 #include "hn_var.h"
33 #include "hn_rndis.h"
34 #include "hn_nvs.h"
35 #include "ndis.h"
36 
37 #define HN_NVS_SEND_MSG_SIZE \
38 	(sizeof(struct vmbus_chanpkt_hdr) + sizeof(struct hn_nvs_rndis))
39 
40 #define HN_TXD_CACHE_SIZE	32 /* per cpu tx_descriptor pool cache */
41 #define HN_TXCOPY_THRESHOLD	512
42 
43 #define HN_RXCOPY_THRESHOLD	256
44 #define HN_RXQ_EVENT_DEFAULT	2048
45 
46 struct hn_rxinfo {
47 	uint32_t	vlan_info;
48 	uint32_t	csum_info;
49 	uint32_t	hash_info;
50 	uint32_t	hash_value;
51 };
52 
53 #define HN_RXINFO_VLAN			0x0001
54 #define HN_RXINFO_CSUM			0x0002
55 #define HN_RXINFO_HASHINF		0x0004
56 #define HN_RXINFO_HASHVAL		0x0008
57 #define HN_RXINFO_ALL			\
58 	(HN_RXINFO_VLAN |		\
59 	 HN_RXINFO_CSUM |		\
60 	 HN_RXINFO_HASHINF |		\
61 	 HN_RXINFO_HASHVAL)
62 
63 #define HN_NDIS_VLAN_INFO_INVALID	0xffffffff
64 #define HN_NDIS_RXCSUM_INFO_INVALID	0
65 #define HN_NDIS_HASH_INFO_INVALID	0
66 
67 /*
68  * Per-transmit book keeping.
69  * A slot in transmit ring (chim_index) is reserved for each transmit.
70  *
71  * There are two types of transmit:
72  *   - buffered transmit where chimney buffer is used and RNDIS header
73  *     is in the buffer. mbuf == NULL for this case.
74  *
75  *   - direct transmit where RNDIS header is in the in  rndis_pkt
76  *     mbuf is freed after transmit.
77  *
78  * Descriptors come from per-port pool which is used
79  * to limit number of outstanding requests per device.
80  */
81 struct hn_txdesc {
82 	struct rte_mbuf *m;
83 
84 	uint16_t	queue_id;
85 	uint16_t	chim_index;
86 	uint32_t	chim_size;
87 	uint32_t	data_size;
88 	uint32_t	packets;
89 
90 	struct rndis_packet_msg *rndis_pkt;
91 };
92 
93 #define HN_RNDIS_PKT_LEN				\
94 	(sizeof(struct rndis_packet_msg) +		\
95 	 RNDIS_PKTINFO_SIZE(NDIS_HASH_VALUE_SIZE) +	\
96 	 RNDIS_PKTINFO_SIZE(NDIS_VLAN_INFO_SIZE) +	\
97 	 RNDIS_PKTINFO_SIZE(NDIS_LSO2_INFO_SIZE) +	\
98 	 RNDIS_PKTINFO_SIZE(NDIS_TXCSUM_INFO_SIZE))
99 
100 /* Minimum space required for a packet */
101 #define HN_PKTSIZE_MIN(align) \
102 	RTE_ALIGN(ETHER_MIN_LEN + HN_RNDIS_PKT_LEN, align)
103 
104 #define DEFAULT_TX_FREE_THRESH 32U
105 
106 static void
107 hn_update_packet_stats(struct hn_stats *stats, const struct rte_mbuf *m)
108 {
109 	uint32_t s = m->pkt_len;
110 	const struct ether_addr *ea;
111 
112 	if (s == 64) {
113 		stats->size_bins[1]++;
114 	} else if (s > 64 && s < 1024) {
115 		uint32_t bin;
116 
117 		/* count zeros, and offset into correct bin */
118 		bin = (sizeof(s) * 8) - __builtin_clz(s) - 5;
119 		stats->size_bins[bin]++;
120 	} else {
121 		if (s < 64)
122 			stats->size_bins[0]++;
123 		else if (s < 1519)
124 			stats->size_bins[6]++;
125 		else if (s >= 1519)
126 			stats->size_bins[7]++;
127 	}
128 
129 	ea = rte_pktmbuf_mtod(m, const struct ether_addr *);
130 	if (is_multicast_ether_addr(ea)) {
131 		if (is_broadcast_ether_addr(ea))
132 			stats->broadcast++;
133 		else
134 			stats->multicast++;
135 	}
136 }
137 
138 static inline unsigned int hn_rndis_pktlen(const struct rndis_packet_msg *pkt)
139 {
140 	return pkt->pktinfooffset + pkt->pktinfolen;
141 }
142 
143 static inline uint32_t
144 hn_rndis_pktmsg_offset(uint32_t ofs)
145 {
146 	return ofs - offsetof(struct rndis_packet_msg, dataoffset);
147 }
148 
149 static void hn_txd_init(struct rte_mempool *mp __rte_unused,
150 			void *opaque, void *obj, unsigned int idx)
151 {
152 	struct hn_txdesc *txd = obj;
153 	struct rte_eth_dev *dev = opaque;
154 	struct rndis_packet_msg *pkt;
155 
156 	memset(txd, 0, sizeof(*txd));
157 	txd->chim_index = idx;
158 
159 	pkt = rte_malloc_socket("RNDIS_TX", HN_RNDIS_PKT_LEN,
160 				rte_align32pow2(HN_RNDIS_PKT_LEN),
161 				dev->device->numa_node);
162 	if (!pkt)
163 		rte_exit(EXIT_FAILURE, "can not allocate RNDIS header");
164 
165 	txd->rndis_pkt = pkt;
166 }
167 
168 /*
169  * Unlike Linux and FreeBSD, this driver uses a mempool
170  * to limit outstanding transmits and reserve buffers
171  */
172 int
173 hn_tx_pool_init(struct rte_eth_dev *dev)
174 {
175 	struct hn_data *hv = dev->data->dev_private;
176 	char name[RTE_MEMPOOL_NAMESIZE];
177 	struct rte_mempool *mp;
178 
179 	snprintf(name, sizeof(name),
180 		 "hn_txd_%u", dev->data->port_id);
181 
182 	PMD_INIT_LOG(DEBUG, "create a TX send pool %s n=%u size=%zu socket=%d",
183 		     name, hv->chim_cnt, sizeof(struct hn_txdesc),
184 		     dev->device->numa_node);
185 
186 	mp = rte_mempool_create(name, hv->chim_cnt, sizeof(struct hn_txdesc),
187 				HN_TXD_CACHE_SIZE, 0,
188 				NULL, NULL,
189 				hn_txd_init, dev,
190 				dev->device->numa_node, 0);
191 	if (!mp) {
192 		PMD_DRV_LOG(ERR,
193 			    "mempool %s create failed: %d", name, rte_errno);
194 		return -rte_errno;
195 	}
196 
197 	hv->tx_pool = mp;
198 	return 0;
199 }
200 
201 static void hn_reset_txagg(struct hn_tx_queue *txq)
202 {
203 	txq->agg_szleft = txq->agg_szmax;
204 	txq->agg_pktleft = txq->agg_pktmax;
205 	txq->agg_txd = NULL;
206 	txq->agg_prevpkt = NULL;
207 }
208 
209 int
210 hn_dev_tx_queue_setup(struct rte_eth_dev *dev,
211 		      uint16_t queue_idx, uint16_t nb_desc __rte_unused,
212 		      unsigned int socket_id,
213 		      const struct rte_eth_txconf *tx_conf)
214 
215 {
216 	struct hn_data *hv = dev->data->dev_private;
217 	struct hn_tx_queue *txq;
218 	uint32_t tx_free_thresh;
219 
220 	PMD_INIT_FUNC_TRACE();
221 
222 	txq = rte_zmalloc_socket("HN_TXQ", sizeof(*txq), RTE_CACHE_LINE_SIZE,
223 				 socket_id);
224 	if (!txq)
225 		return -ENOMEM;
226 
227 	txq->hv = hv;
228 	txq->chan = hv->channels[queue_idx];
229 	txq->port_id = dev->data->port_id;
230 	txq->queue_id = queue_idx;
231 
232 	tx_free_thresh = tx_conf->tx_free_thresh;
233 	if (tx_free_thresh == 0)
234 		tx_free_thresh = RTE_MIN(hv->chim_cnt / 4,
235 					 DEFAULT_TX_FREE_THRESH);
236 
237 	if (tx_free_thresh >= hv->chim_cnt - 3)
238 		tx_free_thresh = hv->chim_cnt - 3;
239 
240 	txq->free_thresh = tx_free_thresh;
241 
242 	txq->agg_szmax  = RTE_MIN(hv->chim_szmax, hv->rndis_agg_size);
243 	txq->agg_pktmax = hv->rndis_agg_pkts;
244 	txq->agg_align  = hv->rndis_agg_align;
245 
246 	hn_reset_txagg(txq);
247 
248 	dev->data->tx_queues[queue_idx] = txq;
249 
250 	return 0;
251 }
252 
253 void
254 hn_dev_tx_queue_release(void *arg)
255 {
256 	struct hn_tx_queue *txq = arg;
257 	struct hn_txdesc *txd;
258 
259 	PMD_INIT_FUNC_TRACE();
260 
261 	if (!txq)
262 		return;
263 
264 	/* If any pending data is still present just drop it */
265 	txd = txq->agg_txd;
266 	if (txd)
267 		rte_mempool_put(txq->hv->tx_pool, txd);
268 
269 	rte_free(txq);
270 }
271 
272 void
273 hn_dev_tx_queue_info(struct rte_eth_dev *dev, uint16_t queue_idx,
274 		     struct rte_eth_txq_info *qinfo)
275 {
276 	struct hn_data *hv = dev->data->dev_private;
277 	struct hn_tx_queue *txq = dev->data->rx_queues[queue_idx];
278 
279 	qinfo->conf.tx_free_thresh = txq->free_thresh;
280 	qinfo->nb_desc = hv->tx_pool->size;
281 }
282 
283 static void
284 hn_nvs_send_completed(struct rte_eth_dev *dev, uint16_t queue_id,
285 		      unsigned long xactid, const struct hn_nvs_rndis_ack *ack)
286 {
287 	struct hn_txdesc *txd = (struct hn_txdesc *)xactid;
288 	struct hn_tx_queue *txq;
289 
290 	/* Control packets are sent with xacid == 0 */
291 	if (!txd)
292 		return;
293 
294 	txq = dev->data->tx_queues[queue_id];
295 	if (likely(ack->status == NVS_STATUS_OK)) {
296 		PMD_TX_LOG(DEBUG, "port %u:%u complete tx %u packets %u bytes %u",
297 			   txq->port_id, txq->queue_id, txd->chim_index,
298 			   txd->packets, txd->data_size);
299 		txq->stats.bytes += txd->data_size;
300 		txq->stats.packets += txd->packets;
301 	} else {
302 		PMD_TX_LOG(NOTICE, "port %u:%u complete tx %u failed status %u",
303 			   txq->port_id, txq->queue_id, txd->chim_index, ack->status);
304 		++txq->stats.errors;
305 	}
306 
307 	rte_pktmbuf_free(txd->m);
308 
309 	rte_mempool_put(txq->hv->tx_pool, txd);
310 }
311 
312 /* Handle transmit completion events */
313 static void
314 hn_nvs_handle_comp(struct rte_eth_dev *dev, uint16_t queue_id,
315 		   const struct vmbus_chanpkt_hdr *pkt,
316 		   const void *data)
317 {
318 	const struct hn_nvs_hdr *hdr = data;
319 
320 	switch (hdr->type) {
321 	case NVS_TYPE_RNDIS_ACK:
322 		hn_nvs_send_completed(dev, queue_id, pkt->xactid, data);
323 		break;
324 
325 	default:
326 		PMD_TX_LOG(NOTICE,
327 			   "unexpected send completion type %u",
328 			   hdr->type);
329 	}
330 }
331 
332 /* Parse per-packet info (meta data) */
333 static int
334 hn_rndis_rxinfo(const void *info_data, unsigned int info_dlen,
335 		struct hn_rxinfo *info)
336 {
337 	const struct rndis_pktinfo *pi = info_data;
338 	uint32_t mask = 0;
339 
340 	while (info_dlen != 0) {
341 		const void *data;
342 		uint32_t dlen;
343 
344 		if (unlikely(info_dlen < sizeof(*pi)))
345 			return -EINVAL;
346 
347 		if (unlikely(info_dlen < pi->size))
348 			return -EINVAL;
349 		info_dlen -= pi->size;
350 
351 		if (unlikely(pi->size & RNDIS_PKTINFO_SIZE_ALIGNMASK))
352 			return -EINVAL;
353 		if (unlikely(pi->size < pi->offset))
354 			return -EINVAL;
355 
356 		dlen = pi->size - pi->offset;
357 		data = pi->data;
358 
359 		switch (pi->type) {
360 		case NDIS_PKTINFO_TYPE_VLAN:
361 			if (unlikely(dlen < NDIS_VLAN_INFO_SIZE))
362 				return -EINVAL;
363 			info->vlan_info = *((const uint32_t *)data);
364 			mask |= HN_RXINFO_VLAN;
365 			break;
366 
367 		case NDIS_PKTINFO_TYPE_CSUM:
368 			if (unlikely(dlen < NDIS_RXCSUM_INFO_SIZE))
369 				return -EINVAL;
370 			info->csum_info = *((const uint32_t *)data);
371 			mask |= HN_RXINFO_CSUM;
372 			break;
373 
374 		case NDIS_PKTINFO_TYPE_HASHVAL:
375 			if (unlikely(dlen < NDIS_HASH_VALUE_SIZE))
376 				return -EINVAL;
377 			info->hash_value = *((const uint32_t *)data);
378 			mask |= HN_RXINFO_HASHVAL;
379 			break;
380 
381 		case NDIS_PKTINFO_TYPE_HASHINF:
382 			if (unlikely(dlen < NDIS_HASH_INFO_SIZE))
383 				return -EINVAL;
384 			info->hash_info = *((const uint32_t *)data);
385 			mask |= HN_RXINFO_HASHINF;
386 			break;
387 
388 		default:
389 			goto next;
390 		}
391 
392 		if (mask == HN_RXINFO_ALL)
393 			break; /* All found; done */
394 next:
395 		pi = (const struct rndis_pktinfo *)
396 		    ((const uint8_t *)pi + pi->size);
397 	}
398 
399 	/*
400 	 * Final fixup.
401 	 * - If there is no hash value, invalidate the hash info.
402 	 */
403 	if (!(mask & HN_RXINFO_HASHVAL))
404 		info->hash_info = HN_NDIS_HASH_INFO_INVALID;
405 	return 0;
406 }
407 
408 /*
409  * Ack the consumed RXBUF associated w/ this channel packet,
410  * so that this RXBUF can be recycled by the hypervisor.
411  */
412 static void hn_rx_buf_release(struct hn_rx_bufinfo *rxb)
413 {
414 	struct rte_mbuf_ext_shared_info *shinfo = &rxb->shinfo;
415 	struct hn_data *hv = rxb->hv;
416 
417 	if (rte_mbuf_ext_refcnt_update(shinfo, -1) == 0) {
418 		hn_nvs_ack_rxbuf(rxb->chan, rxb->xactid);
419 		--hv->rxbuf_outstanding;
420 	}
421 }
422 
423 static void hn_rx_buf_free_cb(void *buf __rte_unused, void *opaque)
424 {
425 	hn_rx_buf_release(opaque);
426 }
427 
428 static struct hn_rx_bufinfo *hn_rx_buf_init(const struct hn_rx_queue *rxq,
429 					    const struct vmbus_chanpkt_rxbuf *pkt)
430 {
431 	struct hn_rx_bufinfo *rxb;
432 
433 	rxb = rxq->hv->rxbuf_info + pkt->hdr.xactid;
434 	rxb->chan = rxq->chan;
435 	rxb->xactid = pkt->hdr.xactid;
436 	rxb->hv = rxq->hv;
437 
438 	rxb->shinfo.free_cb = hn_rx_buf_free_cb;
439 	rxb->shinfo.fcb_opaque = rxb;
440 	rte_mbuf_ext_refcnt_set(&rxb->shinfo, 1);
441 	return rxb;
442 }
443 
444 static void hn_rxpkt(struct hn_rx_queue *rxq, struct hn_rx_bufinfo *rxb,
445 		     uint8_t *data, unsigned int headroom, unsigned int dlen,
446 		     const struct hn_rxinfo *info)
447 {
448 	struct hn_data *hv = rxq->hv;
449 	struct rte_mbuf *m;
450 
451 	m = rte_pktmbuf_alloc(rxq->mb_pool);
452 	if (unlikely(!m)) {
453 		struct rte_eth_dev *dev =
454 			&rte_eth_devices[rxq->port_id];
455 
456 		dev->data->rx_mbuf_alloc_failed++;
457 		return;
458 	}
459 
460 	/*
461 	 * For large packets, avoid copy if possible but need to keep
462 	 * some space available in receive area for later packets.
463 	 */
464 	if (dlen >= HN_RXCOPY_THRESHOLD &&
465 	    hv->rxbuf_outstanding < hv->rxbuf_section_cnt / 2) {
466 		struct rte_mbuf_ext_shared_info *shinfo;
467 		const void *rxbuf;
468 		rte_iova_t iova;
469 
470 		/*
471 		 * Build an external mbuf that points to recveive area.
472 		 * Use refcount to handle multiple packets in same
473 		 * receive buffer section.
474 		 */
475 		rxbuf = hv->rxbuf_res->addr;
476 		iova = rte_mem_virt2iova(rxbuf) + RTE_PTR_DIFF(data, rxbuf);
477 		shinfo = &rxb->shinfo;
478 
479 		if (rte_mbuf_ext_refcnt_update(shinfo, 1) == 1)
480 			++hv->rxbuf_outstanding;
481 
482 		rte_pktmbuf_attach_extbuf(m, data, iova,
483 					  dlen + headroom, shinfo);
484 		m->data_off = headroom;
485 	} else {
486 		/* Mbuf's in pool must be large enough to hold small packets */
487 		if (unlikely(rte_pktmbuf_tailroom(m) < dlen)) {
488 			rte_pktmbuf_free_seg(m);
489 			++rxq->stats.errors;
490 			return;
491 		}
492 		rte_memcpy(rte_pktmbuf_mtod(m, void *),
493 			   data + headroom, dlen);
494 	}
495 
496 	m->port = rxq->port_id;
497 	m->pkt_len = dlen;
498 	m->data_len = dlen;
499 	m->packet_type = rte_net_get_ptype(m, NULL,
500 					   RTE_PTYPE_L2_MASK |
501 					   RTE_PTYPE_L3_MASK |
502 					   RTE_PTYPE_L4_MASK);
503 
504 	if (info->vlan_info != HN_NDIS_VLAN_INFO_INVALID) {
505 		m->vlan_tci = info->vlan_info;
506 		m->ol_flags |= PKT_RX_VLAN_STRIPPED | PKT_RX_VLAN;
507 	}
508 
509 	if (info->csum_info != HN_NDIS_RXCSUM_INFO_INVALID) {
510 		if (info->csum_info & NDIS_RXCSUM_INFO_IPCS_OK)
511 			m->ol_flags |= PKT_RX_IP_CKSUM_GOOD;
512 
513 		if (info->csum_info & (NDIS_RXCSUM_INFO_UDPCS_OK
514 				       | NDIS_RXCSUM_INFO_TCPCS_OK))
515 			m->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
516 		else if (info->csum_info & (NDIS_RXCSUM_INFO_TCPCS_FAILED
517 					    | NDIS_RXCSUM_INFO_UDPCS_FAILED))
518 			m->ol_flags |= PKT_RX_L4_CKSUM_BAD;
519 	}
520 
521 	if (info->hash_info != HN_NDIS_HASH_INFO_INVALID) {
522 		m->ol_flags |= PKT_RX_RSS_HASH;
523 		m->hash.rss = info->hash_value;
524 	}
525 
526 	PMD_RX_LOG(DEBUG,
527 		   "port %u:%u RX id %"PRIu64" size %u type %#x ol_flags %#"PRIx64,
528 		   rxq->port_id, rxq->queue_id, rxb->xactid,
529 		   m->pkt_len, m->packet_type, m->ol_flags);
530 
531 	++rxq->stats.packets;
532 	rxq->stats.bytes += m->pkt_len;
533 	hn_update_packet_stats(&rxq->stats, m);
534 
535 	if (unlikely(rte_ring_sp_enqueue(rxq->rx_ring, m) != 0)) {
536 		++rxq->ring_full;
537 		rte_pktmbuf_free(m);
538 	}
539 }
540 
541 static void hn_rndis_rx_data(struct hn_rx_queue *rxq,
542 			     struct hn_rx_bufinfo *rxb,
543 			     void *data, uint32_t dlen)
544 {
545 	unsigned int data_off, data_len, pktinfo_off, pktinfo_len;
546 	const struct rndis_packet_msg *pkt = data;
547 	struct hn_rxinfo info = {
548 		.vlan_info = HN_NDIS_VLAN_INFO_INVALID,
549 		.csum_info = HN_NDIS_RXCSUM_INFO_INVALID,
550 		.hash_info = HN_NDIS_HASH_INFO_INVALID,
551 	};
552 	int err;
553 
554 	hn_rndis_dump(pkt);
555 
556 	if (unlikely(dlen < sizeof(*pkt)))
557 		goto error;
558 
559 	if (unlikely(dlen < pkt->len))
560 		goto error; /* truncated RNDIS from host */
561 
562 	if (unlikely(pkt->len < pkt->datalen
563 		     + pkt->oobdatalen + pkt->pktinfolen))
564 		goto error;
565 
566 	if (unlikely(pkt->datalen == 0))
567 		goto error;
568 
569 	/* Check offsets. */
570 	if (unlikely(pkt->dataoffset < RNDIS_PACKET_MSG_OFFSET_MIN))
571 		goto error;
572 
573 	if (likely(pkt->pktinfooffset > 0) &&
574 	    unlikely(pkt->pktinfooffset < RNDIS_PACKET_MSG_OFFSET_MIN ||
575 		     (pkt->pktinfooffset & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK)))
576 		goto error;
577 
578 	data_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->dataoffset);
579 	data_len = pkt->datalen;
580 	pktinfo_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->pktinfooffset);
581 	pktinfo_len = pkt->pktinfolen;
582 
583 	if (likely(pktinfo_len > 0)) {
584 		err = hn_rndis_rxinfo((const uint8_t *)pkt + pktinfo_off,
585 				      pktinfo_len, &info);
586 		if (err)
587 			goto error;
588 	}
589 
590 	if (unlikely(data_off + data_len > pkt->len))
591 		goto error;
592 
593 	if (unlikely(data_len < ETHER_HDR_LEN))
594 		goto error;
595 
596 	hn_rxpkt(rxq, rxb, data, data_off, data_len, &info);
597 	return;
598 error:
599 	++rxq->stats.errors;
600 }
601 
602 static void
603 hn_rndis_receive(const struct rte_eth_dev *dev, struct hn_rx_queue *rxq,
604 		 struct hn_rx_bufinfo *rxb, void *buf, uint32_t len)
605 {
606 	const struct rndis_msghdr *hdr = buf;
607 
608 	switch (hdr->type) {
609 	case RNDIS_PACKET_MSG:
610 		if (dev->data->dev_started)
611 			hn_rndis_rx_data(rxq, rxb, buf, len);
612 		break;
613 
614 	case RNDIS_INDICATE_STATUS_MSG:
615 		hn_rndis_link_status(rxq->hv, buf);
616 		break;
617 
618 	case RNDIS_INITIALIZE_CMPLT:
619 	case RNDIS_QUERY_CMPLT:
620 	case RNDIS_SET_CMPLT:
621 		hn_rndis_receive_response(rxq->hv, buf, len);
622 		break;
623 
624 	default:
625 		PMD_DRV_LOG(NOTICE,
626 			    "unexpected RNDIS message (type %#x len %u)",
627 			    hdr->type, len);
628 		break;
629 	}
630 }
631 
632 static void
633 hn_nvs_handle_rxbuf(struct rte_eth_dev *dev,
634 		    struct hn_data *hv,
635 		    struct hn_rx_queue *rxq,
636 		    const struct vmbus_chanpkt_hdr *hdr,
637 		    const void *buf)
638 {
639 	const struct vmbus_chanpkt_rxbuf *pkt;
640 	const struct hn_nvs_hdr *nvs_hdr = buf;
641 	uint32_t rxbuf_sz = hv->rxbuf_res->len;
642 	char *rxbuf = hv->rxbuf_res->addr;
643 	unsigned int i, hlen, count;
644 	struct hn_rx_bufinfo *rxb;
645 
646 	/* At minimum we need type header */
647 	if (unlikely(vmbus_chanpkt_datalen(hdr) < sizeof(*nvs_hdr))) {
648 		PMD_RX_LOG(ERR, "invalid receive nvs RNDIS");
649 		return;
650 	}
651 
652 	/* Make sure that this is a RNDIS message. */
653 	if (unlikely(nvs_hdr->type != NVS_TYPE_RNDIS)) {
654 		PMD_RX_LOG(ERR, "nvs type %u, not RNDIS",
655 			   nvs_hdr->type);
656 		return;
657 	}
658 
659 	hlen = vmbus_chanpkt_getlen(hdr->hlen);
660 	if (unlikely(hlen < sizeof(*pkt))) {
661 		PMD_RX_LOG(ERR, "invalid rxbuf chanpkt");
662 		return;
663 	}
664 
665 	pkt = container_of(hdr, const struct vmbus_chanpkt_rxbuf, hdr);
666 	if (unlikely(pkt->rxbuf_id != NVS_RXBUF_SIG)) {
667 		PMD_RX_LOG(ERR, "invalid rxbuf_id 0x%08x",
668 			   pkt->rxbuf_id);
669 		return;
670 	}
671 
672 	count = pkt->rxbuf_cnt;
673 	if (unlikely(hlen < offsetof(struct vmbus_chanpkt_rxbuf,
674 				     rxbuf[count]))) {
675 		PMD_RX_LOG(ERR, "invalid rxbuf_cnt %u", count);
676 		return;
677 	}
678 
679 	if (pkt->hdr.xactid > hv->rxbuf_section_cnt) {
680 		PMD_RX_LOG(ERR, "invalid rxbuf section id %" PRIx64,
681 			   pkt->hdr.xactid);
682 		return;
683 	}
684 
685 	/* Setup receive buffer info to allow for callback */
686 	rxb = hn_rx_buf_init(rxq, pkt);
687 
688 	/* Each range represents 1 RNDIS pkt that contains 1 Ethernet frame */
689 	for (i = 0; i < count; ++i) {
690 		unsigned int ofs, len;
691 
692 		ofs = pkt->rxbuf[i].ofs;
693 		len = pkt->rxbuf[i].len;
694 
695 		if (unlikely(ofs + len > rxbuf_sz)) {
696 			PMD_RX_LOG(ERR,
697 				   "%uth RNDIS msg overflow ofs %u, len %u",
698 				   i, ofs, len);
699 			continue;
700 		}
701 
702 		if (unlikely(len == 0)) {
703 			PMD_RX_LOG(ERR, "%uth RNDIS msg len %u", i, len);
704 			continue;
705 		}
706 
707 		hn_rndis_receive(dev, rxq, rxb,
708 				 rxbuf + ofs, len);
709 	}
710 
711 	/* Send ACK now if external mbuf not used */
712 	hn_rx_buf_release(rxb);
713 }
714 
715 struct hn_rx_queue *hn_rx_queue_alloc(struct hn_data *hv,
716 				      uint16_t queue_id,
717 				      unsigned int socket_id)
718 {
719 	struct hn_rx_queue *rxq;
720 
721 	rxq = rte_zmalloc_socket("HN_RXQ",
722 				 sizeof(*rxq) + HN_RXQ_EVENT_DEFAULT,
723 				 RTE_CACHE_LINE_SIZE, socket_id);
724 	if (rxq) {
725 		rxq->hv = hv;
726 		rxq->chan = hv->channels[queue_id];
727 		rte_spinlock_init(&rxq->ring_lock);
728 		rxq->port_id = hv->port_id;
729 		rxq->queue_id = queue_id;
730 	}
731 	return rxq;
732 }
733 
734 int
735 hn_dev_rx_queue_setup(struct rte_eth_dev *dev,
736 		      uint16_t queue_idx, uint16_t nb_desc,
737 		      unsigned int socket_id,
738 		      const struct rte_eth_rxconf *rx_conf __rte_unused,
739 		      struct rte_mempool *mp)
740 {
741 	struct hn_data *hv = dev->data->dev_private;
742 	char ring_name[RTE_RING_NAMESIZE];
743 	struct hn_rx_queue *rxq;
744 	unsigned int count;
745 
746 	PMD_INIT_FUNC_TRACE();
747 
748 	if (queue_idx == 0) {
749 		rxq = hv->primary;
750 	} else {
751 		rxq = hn_rx_queue_alloc(hv, queue_idx, socket_id);
752 		if (!rxq)
753 			return -ENOMEM;
754 	}
755 
756 	rxq->mb_pool = mp;
757 	count = rte_mempool_avail_count(mp) / dev->data->nb_rx_queues;
758 	if (nb_desc == 0 || nb_desc > count)
759 		nb_desc = count;
760 
761 	/*
762 	 * Staging ring from receive event logic to rx_pkts.
763 	 * rx_pkts assumes caller is handling multi-thread issue.
764 	 * event logic has locking.
765 	 */
766 	snprintf(ring_name, sizeof(ring_name),
767 		 "hn_rx_%u_%u", dev->data->port_id, queue_idx);
768 	rxq->rx_ring = rte_ring_create(ring_name,
769 				       rte_align32pow2(nb_desc),
770 				       socket_id, 0);
771 	if (!rxq->rx_ring)
772 		goto fail;
773 
774 	dev->data->rx_queues[queue_idx] = rxq;
775 	return 0;
776 
777 fail:
778 	rte_ring_free(rxq->rx_ring);
779 	rte_free(rxq->event_buf);
780 	rte_free(rxq);
781 	return -ENOMEM;
782 }
783 
784 void
785 hn_dev_rx_queue_release(void *arg)
786 {
787 	struct hn_rx_queue *rxq = arg;
788 
789 	PMD_INIT_FUNC_TRACE();
790 
791 	if (!rxq)
792 		return;
793 
794 	rte_ring_free(rxq->rx_ring);
795 	rxq->rx_ring = NULL;
796 	rxq->mb_pool = NULL;
797 
798 	if (rxq != rxq->hv->primary) {
799 		rte_free(rxq->event_buf);
800 		rte_free(rxq);
801 	}
802 }
803 
804 void
805 hn_dev_rx_queue_info(struct rte_eth_dev *dev, uint16_t queue_idx,
806 		     struct rte_eth_rxq_info *qinfo)
807 {
808 	struct hn_rx_queue *rxq = dev->data->rx_queues[queue_idx];
809 
810 	qinfo->mp = rxq->mb_pool;
811 	qinfo->scattered_rx = 1;
812 	qinfo->nb_desc = rte_ring_get_capacity(rxq->rx_ring);
813 }
814 
815 static void
816 hn_nvs_handle_notify(const struct vmbus_chanpkt_hdr *pkthdr,
817 		     const void *data)
818 {
819 	const struct hn_nvs_hdr *hdr = data;
820 
821 	if (unlikely(vmbus_chanpkt_datalen(pkthdr) < sizeof(*hdr))) {
822 		PMD_DRV_LOG(ERR, "invalid nvs notify");
823 		return;
824 	}
825 
826 	PMD_DRV_LOG(INFO,
827 		    "got notify, nvs type %u", hdr->type);
828 }
829 
830 /*
831  * Process pending events on the channel.
832  * Called from both Rx queue poll and Tx cleanup
833  */
834 void hn_process_events(struct hn_data *hv, uint16_t queue_id)
835 {
836 	struct rte_eth_dev *dev = &rte_eth_devices[hv->port_id];
837 	struct hn_rx_queue *rxq;
838 	uint32_t bytes_read = 0;
839 	int ret = 0;
840 
841 	rxq = queue_id == 0 ? hv->primary : dev->data->rx_queues[queue_id];
842 
843 	/* If no pending data then nothing to do */
844 	if (rte_vmbus_chan_rx_empty(rxq->chan))
845 		return;
846 
847 	/*
848 	 * Since channel is shared between Rx and TX queue need to have a lock
849 	 * since DPDK does not force same CPU to be used for Rx/Tx.
850 	 */
851 	if (unlikely(!rte_spinlock_trylock(&rxq->ring_lock)))
852 		return;
853 
854 	for (;;) {
855 		const struct vmbus_chanpkt_hdr *pkt;
856 		uint32_t len = HN_RXQ_EVENT_DEFAULT;
857 		const void *data;
858 
859 		ret = rte_vmbus_chan_recv_raw(rxq->chan, rxq->event_buf, &len);
860 		if (ret == -EAGAIN)
861 			break;	/* ring is empty */
862 
863 		else if (ret == -ENOBUFS)
864 			rte_exit(EXIT_FAILURE, "event buffer not big enough (%u < %u)",
865 				 HN_RXQ_EVENT_DEFAULT, len);
866 		else if (ret <= 0)
867 			rte_exit(EXIT_FAILURE,
868 				 "vmbus ring buffer error: %d", ret);
869 
870 		bytes_read += ret;
871 		pkt = (const struct vmbus_chanpkt_hdr *)rxq->event_buf;
872 		data = (char *)rxq->event_buf + vmbus_chanpkt_getlen(pkt->hlen);
873 
874 		switch (pkt->type) {
875 		case VMBUS_CHANPKT_TYPE_COMP:
876 			hn_nvs_handle_comp(dev, queue_id, pkt, data);
877 			break;
878 
879 		case VMBUS_CHANPKT_TYPE_RXBUF:
880 			hn_nvs_handle_rxbuf(dev, hv, rxq, pkt, data);
881 			break;
882 
883 		case VMBUS_CHANPKT_TYPE_INBAND:
884 			hn_nvs_handle_notify(pkt, data);
885 			break;
886 
887 		default:
888 			PMD_DRV_LOG(ERR, "unknown chan pkt %u", pkt->type);
889 			break;
890 		}
891 
892 		if (rxq->rx_ring && rte_ring_full(rxq->rx_ring))
893 			break;
894 	}
895 
896 	if (bytes_read > 0)
897 		rte_vmbus_chan_signal_read(rxq->chan, bytes_read);
898 
899 	rte_spinlock_unlock(&rxq->ring_lock);
900 }
901 
902 static void hn_append_to_chim(struct hn_tx_queue *txq,
903 			      struct rndis_packet_msg *pkt,
904 			      const struct rte_mbuf *m)
905 {
906 	struct hn_txdesc *txd = txq->agg_txd;
907 	uint8_t *buf = (uint8_t *)pkt;
908 	unsigned int data_offs;
909 
910 	hn_rndis_dump(pkt);
911 
912 	data_offs = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->dataoffset);
913 	txd->chim_size += pkt->len;
914 	txd->data_size += m->pkt_len;
915 	++txd->packets;
916 	hn_update_packet_stats(&txq->stats, m);
917 
918 	for (; m; m = m->next) {
919 		uint16_t len = rte_pktmbuf_data_len(m);
920 
921 		rte_memcpy(buf + data_offs,
922 			   rte_pktmbuf_mtod(m, const char *), len);
923 		data_offs += len;
924 	}
925 }
926 
927 /*
928  * Send pending aggregated data in chimney buffer (if any).
929  * Returns error if send was unsuccessful because channel ring buffer
930  * was full.
931  */
932 static int hn_flush_txagg(struct hn_tx_queue *txq, bool *need_sig)
933 
934 {
935 	struct hn_txdesc *txd = txq->agg_txd;
936 	struct hn_nvs_rndis rndis;
937 	int ret;
938 
939 	if (!txd)
940 		return 0;
941 
942 	rndis = (struct hn_nvs_rndis) {
943 		.type = NVS_TYPE_RNDIS,
944 		.rndis_mtype = NVS_RNDIS_MTYPE_DATA,
945 		.chim_idx = txd->chim_index,
946 		.chim_sz = txd->chim_size,
947 	};
948 
949 	PMD_TX_LOG(DEBUG, "port %u:%u tx %u size %u",
950 		   txq->port_id, txq->queue_id, txd->chim_index, txd->chim_size);
951 
952 	ret = hn_nvs_send(txq->chan, VMBUS_CHANPKT_FLAG_RC,
953 			  &rndis, sizeof(rndis), (uintptr_t)txd, need_sig);
954 
955 	if (likely(ret == 0))
956 		hn_reset_txagg(txq);
957 	else
958 		PMD_TX_LOG(NOTICE, "port %u:%u send failed: %d",
959 			   txq->port_id, txq->queue_id, ret);
960 
961 	return ret;
962 }
963 
964 static struct hn_txdesc *hn_new_txd(struct hn_data *hv,
965 				    struct hn_tx_queue *txq)
966 {
967 	struct hn_txdesc *txd;
968 
969 	if (rte_mempool_get(hv->tx_pool, (void **)&txd)) {
970 		++txq->stats.nomemory;
971 		PMD_TX_LOG(DEBUG, "tx pool exhausted!");
972 		return NULL;
973 	}
974 
975 	txd->m = NULL;
976 	txd->queue_id = txq->queue_id;
977 	txd->packets = 0;
978 	txd->data_size = 0;
979 	txd->chim_size = 0;
980 
981 	return txd;
982 }
983 
984 static void *
985 hn_try_txagg(struct hn_data *hv, struct hn_tx_queue *txq, uint32_t pktsize)
986 {
987 	struct hn_txdesc *agg_txd = txq->agg_txd;
988 	struct rndis_packet_msg *pkt;
989 	void *chim;
990 
991 	if (agg_txd) {
992 		unsigned int padding, olen;
993 
994 		/*
995 		 * Update the previous RNDIS packet's total length,
996 		 * it can be increased due to the mandatory alignment
997 		 * padding for this RNDIS packet.  And update the
998 		 * aggregating txdesc's chimney sending buffer size
999 		 * accordingly.
1000 		 *
1001 		 * Zero-out the padding, as required by the RNDIS spec.
1002 		 */
1003 		pkt = txq->agg_prevpkt;
1004 		olen = pkt->len;
1005 		padding = RTE_ALIGN(olen, txq->agg_align) - olen;
1006 		if (padding > 0) {
1007 			agg_txd->chim_size += padding;
1008 			pkt->len += padding;
1009 			memset((uint8_t *)pkt + olen, 0, padding);
1010 		}
1011 
1012 		chim = (uint8_t *)pkt + pkt->len;
1013 
1014 		txq->agg_pktleft--;
1015 		txq->agg_szleft -= pktsize;
1016 		if (txq->agg_szleft < HN_PKTSIZE_MIN(txq->agg_align)) {
1017 			/*
1018 			 * Probably can't aggregate more packets,
1019 			 * flush this aggregating txdesc proactively.
1020 			 */
1021 			txq->agg_pktleft = 0;
1022 		}
1023 	} else {
1024 		agg_txd = hn_new_txd(hv, txq);
1025 		if (!agg_txd)
1026 			return NULL;
1027 
1028 		chim = (uint8_t *)hv->chim_res->addr
1029 			+ agg_txd->chim_index * hv->chim_szmax;
1030 
1031 		txq->agg_txd = agg_txd;
1032 		txq->agg_pktleft = txq->agg_pktmax - 1;
1033 		txq->agg_szleft = txq->agg_szmax - pktsize;
1034 	}
1035 	txq->agg_prevpkt = chim;
1036 
1037 	return chim;
1038 }
1039 
1040 static inline void *
1041 hn_rndis_pktinfo_append(struct rndis_packet_msg *pkt,
1042 			uint32_t pi_dlen, uint32_t pi_type)
1043 {
1044 	const uint32_t pi_size = RNDIS_PKTINFO_SIZE(pi_dlen);
1045 	struct rndis_pktinfo *pi;
1046 
1047 	/*
1048 	 * Per-packet-info does not move; it only grows.
1049 	 *
1050 	 * NOTE:
1051 	 * pktinfooffset in this phase counts from the beginning
1052 	 * of rndis_packet_msg.
1053 	 */
1054 	pi = (struct rndis_pktinfo *)((uint8_t *)pkt + hn_rndis_pktlen(pkt));
1055 
1056 	pkt->pktinfolen += pi_size;
1057 
1058 	pi->size = pi_size;
1059 	pi->type = pi_type;
1060 	pi->offset = RNDIS_PKTINFO_OFFSET;
1061 
1062 	return pi->data;
1063 }
1064 
1065 /* Put RNDIS header and packet info on packet */
1066 static void hn_encap(struct rndis_packet_msg *pkt,
1067 		     uint16_t queue_id,
1068 		     const struct rte_mbuf *m)
1069 {
1070 	unsigned int hlen = m->l2_len + m->l3_len;
1071 	uint32_t *pi_data;
1072 	uint32_t pkt_hlen;
1073 
1074 	pkt->type = RNDIS_PACKET_MSG;
1075 	pkt->len = m->pkt_len;
1076 	pkt->dataoffset = 0;
1077 	pkt->datalen = m->pkt_len;
1078 	pkt->oobdataoffset = 0;
1079 	pkt->oobdatalen = 0;
1080 	pkt->oobdataelements = 0;
1081 	pkt->pktinfooffset = sizeof(*pkt);
1082 	pkt->pktinfolen = 0;
1083 	pkt->vchandle = 0;
1084 	pkt->reserved = 0;
1085 
1086 	/*
1087 	 * Set the hash value for this packet, to the queue_id to cause
1088 	 * TX done event for this packet on the right channel.
1089 	 */
1090 	pi_data = hn_rndis_pktinfo_append(pkt, NDIS_HASH_VALUE_SIZE,
1091 					  NDIS_PKTINFO_TYPE_HASHVAL);
1092 	*pi_data = queue_id;
1093 
1094 	if (m->ol_flags & PKT_TX_VLAN_PKT) {
1095 		pi_data = hn_rndis_pktinfo_append(pkt, NDIS_VLAN_INFO_SIZE,
1096 						  NDIS_PKTINFO_TYPE_VLAN);
1097 		*pi_data = m->vlan_tci;
1098 	}
1099 
1100 	if (m->ol_flags & PKT_TX_TCP_SEG) {
1101 		pi_data = hn_rndis_pktinfo_append(pkt, NDIS_LSO2_INFO_SIZE,
1102 						  NDIS_PKTINFO_TYPE_LSO);
1103 
1104 		if (m->ol_flags & PKT_TX_IPV6) {
1105 			*pi_data = NDIS_LSO2_INFO_MAKEIPV6(hlen,
1106 							   m->tso_segsz);
1107 		} else {
1108 			*pi_data = NDIS_LSO2_INFO_MAKEIPV4(hlen,
1109 							   m->tso_segsz);
1110 		}
1111 	} else if (m->ol_flags &
1112 		   (PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM | PKT_TX_IP_CKSUM)) {
1113 		pi_data = hn_rndis_pktinfo_append(pkt, NDIS_TXCSUM_INFO_SIZE,
1114 						  NDIS_PKTINFO_TYPE_CSUM);
1115 		*pi_data = 0;
1116 
1117 		if (m->ol_flags & PKT_TX_IPV6)
1118 			*pi_data |= NDIS_TXCSUM_INFO_IPV6;
1119 		if (m->ol_flags & PKT_TX_IPV4) {
1120 			*pi_data |= NDIS_TXCSUM_INFO_IPV4;
1121 
1122 			if (m->ol_flags & PKT_TX_IP_CKSUM)
1123 				*pi_data |= NDIS_TXCSUM_INFO_IPCS;
1124 		}
1125 
1126 		if (m->ol_flags & PKT_TX_TCP_CKSUM)
1127 			*pi_data |= NDIS_TXCSUM_INFO_MKTCPCS(hlen);
1128 		else if (m->ol_flags & PKT_TX_UDP_CKSUM)
1129 			*pi_data |= NDIS_TXCSUM_INFO_MKUDPCS(hlen);
1130 	}
1131 
1132 	pkt_hlen = pkt->pktinfooffset + pkt->pktinfolen;
1133 	/* Fixup RNDIS packet message total length */
1134 	pkt->len += pkt_hlen;
1135 
1136 	/* Convert RNDIS packet message offsets */
1137 	pkt->dataoffset = hn_rndis_pktmsg_offset(pkt_hlen);
1138 	pkt->pktinfooffset = hn_rndis_pktmsg_offset(pkt->pktinfooffset);
1139 }
1140 
1141 /* How many scatter gather list elements ar needed */
1142 static unsigned int hn_get_slots(const struct rte_mbuf *m)
1143 {
1144 	unsigned int slots = 1; /* for RNDIS header */
1145 
1146 	while (m) {
1147 		unsigned int size = rte_pktmbuf_data_len(m);
1148 		unsigned int offs = rte_mbuf_data_iova(m) & PAGE_MASK;
1149 
1150 		slots += (offs + size + PAGE_SIZE - 1) / PAGE_SIZE;
1151 		m = m->next;
1152 	}
1153 
1154 	return slots;
1155 }
1156 
1157 /* Build scatter gather list from chained mbuf */
1158 static unsigned int hn_fill_sg(struct vmbus_gpa *sg,
1159 			       const struct rte_mbuf *m)
1160 {
1161 	unsigned int segs = 0;
1162 
1163 	while (m) {
1164 		rte_iova_t addr = rte_mbuf_data_iova(m);
1165 		unsigned int page = addr / PAGE_SIZE;
1166 		unsigned int offset = addr & PAGE_MASK;
1167 		unsigned int len = rte_pktmbuf_data_len(m);
1168 
1169 		while (len > 0) {
1170 			unsigned int bytes = RTE_MIN(len, PAGE_SIZE - offset);
1171 
1172 			sg[segs].page = page;
1173 			sg[segs].ofs = offset;
1174 			sg[segs].len = bytes;
1175 			segs++;
1176 
1177 			++page;
1178 			offset = 0;
1179 			len -= bytes;
1180 		}
1181 		m = m->next;
1182 	}
1183 
1184 	return segs;
1185 }
1186 
1187 /* Transmit directly from mbuf */
1188 static int hn_xmit_sg(struct hn_tx_queue *txq,
1189 		      const struct hn_txdesc *txd, const struct rte_mbuf *m,
1190 		      bool *need_sig)
1191 {
1192 	struct vmbus_gpa sg[hn_get_slots(m)];
1193 	struct hn_nvs_rndis nvs_rndis = {
1194 		.type = NVS_TYPE_RNDIS,
1195 		.rndis_mtype = NVS_RNDIS_MTYPE_DATA,
1196 		.chim_sz = txd->chim_size,
1197 	};
1198 	rte_iova_t addr;
1199 	unsigned int segs;
1200 
1201 	/* attach aggregation data if present */
1202 	if (txd->chim_size > 0)
1203 		nvs_rndis.chim_idx = txd->chim_index;
1204 	else
1205 		nvs_rndis.chim_idx = NVS_CHIM_IDX_INVALID;
1206 
1207 	hn_rndis_dump(txd->rndis_pkt);
1208 
1209 	/* pass IOVA of rndis header in first segment */
1210 	addr = rte_malloc_virt2iova(txd->rndis_pkt);
1211 	if (unlikely(addr == RTE_BAD_IOVA)) {
1212 		PMD_DRV_LOG(ERR, "RNDIS transmit can not get iova");
1213 		return -EINVAL;
1214 	}
1215 
1216 	sg[0].page = addr / PAGE_SIZE;
1217 	sg[0].ofs = addr & PAGE_MASK;
1218 	sg[0].len = RNDIS_PACKET_MSG_OFFSET_ABS(hn_rndis_pktlen(txd->rndis_pkt));
1219 	segs = 1;
1220 
1221 	hn_update_packet_stats(&txq->stats, m);
1222 
1223 	segs += hn_fill_sg(sg + 1, m);
1224 
1225 	PMD_TX_LOG(DEBUG, "port %u:%u tx %u segs %u size %u",
1226 		   txq->port_id, txq->queue_id, txd->chim_index,
1227 		   segs, nvs_rndis.chim_sz);
1228 
1229 	return hn_nvs_send_sglist(txq->chan, sg, segs,
1230 				  &nvs_rndis, sizeof(nvs_rndis),
1231 				  (uintptr_t)txd, need_sig);
1232 }
1233 
1234 uint16_t
1235 hn_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
1236 {
1237 	struct hn_tx_queue *txq = ptxq;
1238 	struct hn_data *hv = txq->hv;
1239 	bool need_sig = false;
1240 	uint16_t nb_tx;
1241 	int ret;
1242 
1243 	if (unlikely(hv->closed))
1244 		return 0;
1245 
1246 	if (rte_mempool_avail_count(hv->tx_pool) <= txq->free_thresh)
1247 		hn_process_events(hv, txq->queue_id);
1248 
1249 	for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
1250 		struct rte_mbuf *m = tx_pkts[nb_tx];
1251 		uint32_t pkt_size = m->pkt_len + HN_RNDIS_PKT_LEN;
1252 		struct rndis_packet_msg *pkt;
1253 
1254 		/* For small packets aggregate them in chimney buffer */
1255 		if (m->pkt_len < HN_TXCOPY_THRESHOLD && pkt_size <= txq->agg_szmax) {
1256 			/* If this packet will not fit, then flush  */
1257 			if (txq->agg_pktleft == 0 ||
1258 			    RTE_ALIGN(pkt_size, txq->agg_align) > txq->agg_szleft) {
1259 				if (hn_flush_txagg(txq, &need_sig))
1260 					goto fail;
1261 			}
1262 
1263 			pkt = hn_try_txagg(hv, txq, pkt_size);
1264 			if (unlikely(!pkt))
1265 				break;
1266 
1267 			hn_encap(pkt, txq->queue_id, m);
1268 			hn_append_to_chim(txq, pkt, m);
1269 
1270 			rte_pktmbuf_free(m);
1271 
1272 			/* if buffer is full, flush */
1273 			if (txq->agg_pktleft == 0 &&
1274 			    hn_flush_txagg(txq, &need_sig))
1275 				goto fail;
1276 		} else {
1277 			struct hn_txdesc *txd;
1278 
1279 			/* can send chimney data and large packet at once */
1280 			txd = txq->agg_txd;
1281 			if (txd) {
1282 				hn_reset_txagg(txq);
1283 			} else {
1284 				txd = hn_new_txd(hv, txq);
1285 				if (unlikely(!txd))
1286 					break;
1287 			}
1288 
1289 			pkt = txd->rndis_pkt;
1290 			txd->m = m;
1291 			txd->data_size += m->pkt_len;
1292 			++txd->packets;
1293 
1294 			hn_encap(pkt, txq->queue_id, m);
1295 
1296 			ret = hn_xmit_sg(txq, txd, m, &need_sig);
1297 			if (unlikely(ret != 0)) {
1298 				PMD_TX_LOG(NOTICE, "sg send failed: %d", ret);
1299 				++txq->stats.errors;
1300 				rte_mempool_put(hv->tx_pool, txd);
1301 				goto fail;
1302 			}
1303 		}
1304 	}
1305 
1306 	/* If partial buffer left, then try and send it.
1307 	 * if that fails, then reuse it on next send.
1308 	 */
1309 	hn_flush_txagg(txq, &need_sig);
1310 
1311 fail:
1312 	if (need_sig)
1313 		rte_vmbus_chan_signal_tx(txq->chan);
1314 
1315 	return nb_tx;
1316 }
1317 
1318 uint16_t
1319 hn_recv_pkts(void *prxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
1320 {
1321 	struct hn_rx_queue *rxq = prxq;
1322 	struct hn_data *hv = rxq->hv;
1323 
1324 	if (unlikely(hv->closed))
1325 		return 0;
1326 
1327 	/* If ring is empty then process more */
1328 	if (rte_ring_count(rxq->rx_ring) < nb_pkts)
1329 		hn_process_events(hv, rxq->queue_id);
1330 
1331 	/* Get mbufs off staging ring */
1332 	return rte_ring_sc_dequeue_burst(rxq->rx_ring, (void **)rx_pkts,
1333 					 nb_pkts, NULL);
1334 }
1335