xref: /dpdk/drivers/net/ntnic/dbsconfig/ntnic_dbsconfig.c (revision e77506397fc8005c5129e22e9e2d15d5876790fd)
1 /*
2  * SPDX-License-Identifier: BSD-3-Clause
3  * Copyright(c) 2023 Napatech A/S
4  */
5 
6 #include <rte_common.h>
7 #include <unistd.h>
8 
9 #include "ntos_drv.h"
10 #include "nt_util.h"
11 #include "ntnic_virt_queue.h"
12 #include "ntnic_mod_reg.h"
13 #include "ntlog.h"
14 
15 #define STRUCT_ALIGNMENT (4 * 1024LU)
16 #define MAX_VIRT_QUEUES 128
17 
18 #define LAST_QUEUE 127
19 #define DISABLE 0
20 #define ENABLE 1
21 #define RX_AM_DISABLE DISABLE
22 #define RX_AM_ENABLE ENABLE
23 #define RX_UW_DISABLE DISABLE
24 #define RX_UW_ENABLE ENABLE
25 #define RX_Q_DISABLE DISABLE
26 #define RX_Q_ENABLE ENABLE
27 #define RX_AM_POLL_SPEED 5
28 #define RX_UW_POLL_SPEED 9
29 #define INIT_QUEUE 1
30 
31 #define TX_AM_DISABLE DISABLE
32 #define TX_AM_ENABLE ENABLE
33 #define TX_UW_DISABLE DISABLE
34 #define TX_UW_ENABLE ENABLE
35 #define TX_Q_DISABLE DISABLE
36 #define TX_Q_ENABLE ENABLE
37 #define TX_AM_POLL_SPEED 5
38 #define TX_UW_POLL_SPEED 8
39 
40 #define VIRTQ_AVAIL_F_NO_INTERRUPT 1
41 
42 #define vq_log_arg(vq, format, ...)
43 
44 /*
45  * Packed Ring helper macros
46  */
47 #define PACKED(vq_type) ((vq_type) == PACKED_RING ? 1 : 0)
48 
49 #define avail_flag(vq) ((vq)->avail_wrap_count ? VIRTQ_DESC_F_AVAIL : 0)
50 #define used_flag_inv(vq) ((vq)->avail_wrap_count ? 0 : VIRTQ_DESC_F_USED)
51 
52 #define inc_avail(vq, num)                                                                        \
53 	do {                                                                                      \
54 		struct nthw_virt_queue *temp_vq = (vq);                                           \
55 		temp_vq->next_avail += (num);                                                     \
56 		if (temp_vq->next_avail >= temp_vq->queue_size) {                                 \
57 			temp_vq->next_avail -= temp_vq->queue_size;                               \
58 			temp_vq->avail_wrap_count ^= 1;                                           \
59 		}                                                                                 \
60 	} while (0)
61 
62 #define inc_used(vq, num) do { \
63 	struct nthw_virt_queue *temp_vq = (vq); \
64 	temp_vq->next_used += (num); \
65 	if (temp_vq->next_used >= temp_vq->queue_size) { \
66 		temp_vq->next_used -= temp_vq->queue_size; \
67 		temp_vq->used_wrap_count ^= 1; \
68 	} \
69 } while (0)
70 
71 struct __rte_packed_begin virtq_avail {
72 	uint16_t flags;
73 	uint16_t idx;
74 	uint16_t ring[];	/* Queue Size */
75 } __rte_packed_end;
76 
77 struct __rte_packed_begin virtq_used_elem {
78 	/* Index of start of used descriptor chain. */
79 	uint32_t id;
80 	/* Total length of the descriptor chain which was used (written to) */
81 	uint32_t len;
82 } __rte_packed_end;
83 
84 struct __rte_packed_begin virtq_used {
85 	uint16_t flags;
86 	uint16_t idx;
87 	struct virtq_used_elem ring[];	/* Queue Size */
88 } __rte_packed_end;
89 
90 struct virtq_struct_layout_s {
91 	size_t used_offset;
92 	size_t desc_offset;
93 };
94 
95 enum nthw_virt_queue_usage {
96 	NTHW_VIRTQ_UNUSED = 0,
97 	NTHW_VIRTQ_UNMANAGED,
98 	NTHW_VIRTQ_MANAGED
99 };
100 
101 struct nthw_virt_queue {
102 	/* Pointers to virt-queue structs */
103 	union {
104 		struct {
105 			/* SPLIT virtqueue */
106 			struct virtq_avail *p_avail;
107 			struct virtq_used *p_used;
108 			struct virtq_desc *p_desc;
109 			/* Control variables for virt-queue structs */
110 			uint16_t am_idx;
111 			uint16_t used_idx;
112 			uint16_t cached_idx;
113 			uint16_t tx_descr_avail_idx;
114 		};
115 		struct {
116 			/* PACKED virtqueue */
117 			struct pvirtq_event_suppress *driver_event;
118 			struct pvirtq_event_suppress *device_event;
119 			struct pvirtq_desc *desc;
120 			struct {
121 				uint16_t next;
122 				uint16_t num;
123 			} outs;
124 			/*
125 			 * when in-order release used Tx packets from FPGA it may collapse
126 			 * into a batch. When getting new Tx buffers we may only need
127 			 * partial
128 			 */
129 			uint16_t next_avail;
130 			uint16_t next_used;
131 			uint16_t avail_wrap_count;
132 			uint16_t used_wrap_count;
133 		};
134 	};
135 
136 	/* Array with packet buffers */
137 	struct nthw_memory_descriptor *p_virtual_addr;
138 
139 	/* Queue configuration info */
140 	nthw_dbs_t *mp_nthw_dbs;
141 
142 	enum nthw_virt_queue_usage usage;
143 	uint16_t irq_vector;
144 	uint16_t vq_type;
145 	uint16_t in_order;
146 
147 	uint16_t queue_size;
148 	uint32_t index;
149 	uint32_t am_enable;
150 	uint32_t host_id;
151 	uint32_t port;	/* Only used by TX queues */
152 	uint32_t virtual_port;	/* Only used by TX queues */
153 	uint32_t header;
154 	/*
155 	 * Only used by TX queues:
156 	 *   0: VirtIO-Net header (12 bytes).
157 	 *   1: Napatech DVIO0 descriptor (12 bytes).
158 	 */
159 	void *avail_struct_phys_addr;
160 	void *used_struct_phys_addr;
161 	void *desc_struct_phys_addr;
162 };
163 
164 struct pvirtq_struct_layout_s {
165 	size_t driver_event_offset;
166 	size_t device_event_offset;
167 };
168 
169 static struct nthw_virt_queue rxvq[MAX_VIRT_QUEUES];
170 static struct nthw_virt_queue txvq[MAX_VIRT_QUEUES];
171 
172 static void dbs_init_rx_queue(nthw_dbs_t *p_nthw_dbs, uint32_t queue, uint32_t start_idx,
173 	uint32_t start_ptr)
174 {
175 	uint32_t busy;
176 	uint32_t init;
177 	uint32_t dummy;
178 
179 	do {
180 		get_rx_init(p_nthw_dbs, &init, &dummy, &busy);
181 	} while (busy != 0);
182 
183 	set_rx_init(p_nthw_dbs, start_idx, start_ptr, INIT_QUEUE, queue);
184 
185 	do {
186 		get_rx_init(p_nthw_dbs, &init, &dummy, &busy);
187 	} while (busy != 0);
188 }
189 
190 static void dbs_init_tx_queue(nthw_dbs_t *p_nthw_dbs, uint32_t queue, uint32_t start_idx,
191 	uint32_t start_ptr)
192 {
193 	uint32_t busy;
194 	uint32_t init;
195 	uint32_t dummy;
196 
197 	do {
198 		get_tx_init(p_nthw_dbs, &init, &dummy, &busy);
199 	} while (busy != 0);
200 
201 	set_tx_init(p_nthw_dbs, start_idx, start_ptr, INIT_QUEUE, queue);
202 
203 	do {
204 		get_tx_init(p_nthw_dbs, &init, &dummy, &busy);
205 	} while (busy != 0);
206 }
207 
208 static int nthw_virt_queue_init(struct fpga_info_s *p_fpga_info)
209 {
210 	assert(p_fpga_info);
211 
212 	nthw_fpga_t *const p_fpga = p_fpga_info->mp_fpga;
213 	nthw_dbs_t *p_nthw_dbs;
214 	int res = 0;
215 	uint32_t i;
216 
217 	p_fpga_info->mp_nthw_dbs = NULL;
218 
219 	p_nthw_dbs = nthw_dbs_new();
220 
221 	if (p_nthw_dbs == NULL)
222 		return -1;
223 
224 	res = dbs_init(NULL, p_fpga, 0);/* Check that DBS exists in FPGA */
225 
226 	if (res) {
227 		free(p_nthw_dbs);
228 		return res;
229 	}
230 
231 	res = dbs_init(p_nthw_dbs, p_fpga, 0);	/* Create DBS module */
232 
233 	if (res) {
234 		free(p_nthw_dbs);
235 		return res;
236 	}
237 
238 	p_fpga_info->mp_nthw_dbs = p_nthw_dbs;
239 
240 	for (i = 0; i < MAX_VIRT_QUEUES; ++i) {
241 		rxvq[i].usage = NTHW_VIRTQ_UNUSED;
242 		txvq[i].usage = NTHW_VIRTQ_UNUSED;
243 	}
244 
245 	dbs_reset(p_nthw_dbs);
246 
247 	for (i = 0; i < NT_DBS_RX_QUEUES_MAX; ++i)
248 		dbs_init_rx_queue(p_nthw_dbs, i, 0, 0);
249 
250 	for (i = 0; i < NT_DBS_TX_QUEUES_MAX; ++i)
251 		dbs_init_tx_queue(p_nthw_dbs, i, 0, 0);
252 
253 	set_rx_control(p_nthw_dbs, LAST_QUEUE, RX_AM_DISABLE, RX_AM_POLL_SPEED, RX_UW_DISABLE,
254 		RX_UW_POLL_SPEED, RX_Q_DISABLE);
255 	set_rx_control(p_nthw_dbs, LAST_QUEUE, RX_AM_ENABLE, RX_AM_POLL_SPEED, RX_UW_ENABLE,
256 		RX_UW_POLL_SPEED, RX_Q_DISABLE);
257 	set_rx_control(p_nthw_dbs, LAST_QUEUE, RX_AM_ENABLE, RX_AM_POLL_SPEED, RX_UW_ENABLE,
258 		RX_UW_POLL_SPEED, RX_Q_ENABLE);
259 
260 	set_tx_control(p_nthw_dbs, LAST_QUEUE, TX_AM_DISABLE, TX_AM_POLL_SPEED, TX_UW_DISABLE,
261 		TX_UW_POLL_SPEED, TX_Q_DISABLE);
262 	set_tx_control(p_nthw_dbs, LAST_QUEUE, TX_AM_ENABLE, TX_AM_POLL_SPEED, TX_UW_ENABLE,
263 		TX_UW_POLL_SPEED, TX_Q_DISABLE);
264 	set_tx_control(p_nthw_dbs, LAST_QUEUE, TX_AM_ENABLE, TX_AM_POLL_SPEED, TX_UW_ENABLE,
265 		TX_UW_POLL_SPEED, TX_Q_ENABLE);
266 
267 	return 0;
268 }
269 
270 static struct virtq_struct_layout_s dbs_calc_struct_layout(uint32_t queue_size)
271 {
272 	/* + sizeof(uint16_t); ("avail->used_event" is not used) */
273 	size_t avail_mem = sizeof(struct virtq_avail) + queue_size * sizeof(uint16_t);
274 	size_t avail_mem_aligned = ((avail_mem % STRUCT_ALIGNMENT) == 0)
275 		? avail_mem
276 		: STRUCT_ALIGNMENT * (avail_mem / STRUCT_ALIGNMENT + 1);
277 
278 	/* + sizeof(uint16_t); ("used->avail_event" is not used) */
279 	size_t used_mem = sizeof(struct virtq_used) + queue_size * sizeof(struct virtq_used_elem);
280 	size_t used_mem_aligned = ((used_mem % STRUCT_ALIGNMENT) == 0)
281 		? used_mem
282 		: STRUCT_ALIGNMENT * (used_mem / STRUCT_ALIGNMENT + 1);
283 
284 	struct virtq_struct_layout_s virtq_layout;
285 	virtq_layout.used_offset = avail_mem_aligned;
286 	virtq_layout.desc_offset = avail_mem_aligned + used_mem_aligned;
287 
288 	return virtq_layout;
289 }
290 
291 static void dbs_initialize_avail_struct(void *addr, uint16_t queue_size,
292 	uint16_t initial_avail_idx)
293 {
294 	uint16_t i;
295 	struct virtq_avail *p_avail = (struct virtq_avail *)addr;
296 
297 	p_avail->flags = VIRTQ_AVAIL_F_NO_INTERRUPT;
298 	p_avail->idx = initial_avail_idx;
299 
300 	for (i = 0; i < queue_size; ++i)
301 		p_avail->ring[i] = i;
302 }
303 
304 static void dbs_initialize_used_struct(void *addr, uint16_t queue_size)
305 {
306 	int i;
307 	struct virtq_used *p_used = (struct virtq_used *)addr;
308 
309 	p_used->flags = 1;
310 	p_used->idx = 0;
311 
312 	for (i = 0; i < queue_size; ++i) {
313 		p_used->ring[i].id = 0;
314 		p_used->ring[i].len = 0;
315 	}
316 }
317 
318 static void
319 dbs_initialize_descriptor_struct(void *addr,
320 	struct nthw_memory_descriptor *packet_buffer_descriptors,
321 	uint16_t queue_size, uint16_t flgs)
322 {
323 	if (packet_buffer_descriptors) {
324 		int i;
325 		struct virtq_desc *p_desc = (struct virtq_desc *)addr;
326 
327 		for (i = 0; i < queue_size; ++i) {
328 			p_desc[i].addr = (uint64_t)packet_buffer_descriptors[i].phys_addr;
329 			p_desc[i].len = packet_buffer_descriptors[i].len;
330 			p_desc[i].flags = flgs;
331 			p_desc[i].next = 0;
332 		}
333 	}
334 }
335 
336 static void
337 dbs_initialize_virt_queue_structs(void *avail_struct_addr, void *used_struct_addr,
338 	void *desc_struct_addr,
339 	struct nthw_memory_descriptor *packet_buffer_descriptors,
340 	uint16_t queue_size, uint16_t initial_avail_idx, uint16_t flgs)
341 {
342 	dbs_initialize_avail_struct(avail_struct_addr, queue_size, initial_avail_idx);
343 	dbs_initialize_used_struct(used_struct_addr, queue_size);
344 	dbs_initialize_descriptor_struct(desc_struct_addr, packet_buffer_descriptors, queue_size,
345 		flgs);
346 }
347 
348 static uint16_t dbs_qsize_log2(uint16_t qsize)
349 {
350 	uint32_t qs = 0;
351 
352 	while (qsize) {
353 		qsize = qsize >> 1;
354 		++qs;
355 	}
356 
357 	--qs;
358 	return qs;
359 }
360 
361 static struct nthw_virt_queue *nthw_setup_rx_virt_queue(nthw_dbs_t *p_nthw_dbs,
362 	uint32_t index,
363 	uint16_t start_idx,
364 	uint16_t start_ptr,
365 	void *avail_struct_phys_addr,
366 	void *used_struct_phys_addr,
367 	void *desc_struct_phys_addr,
368 	uint16_t queue_size,
369 	uint32_t host_id,
370 	uint32_t header,
371 	uint32_t vq_type,
372 	int irq_vector)
373 {
374 	uint32_t qs = dbs_qsize_log2(queue_size);
375 	uint32_t int_enable;
376 	uint32_t vec;
377 	uint32_t istk;
378 
379 	/*
380 	 * Setup DBS module - DSF00094
381 	 * 3. Configure the DBS.RX_DR_DATA memory; good idea to initialize all
382 	 * DBS_RX_QUEUES entries.
383 	 */
384 	if (set_rx_dr_data(p_nthw_dbs, index, (uint64_t)desc_struct_phys_addr, host_id, qs, header,
385 			0) != 0) {
386 		return NULL;
387 	}
388 
389 	/*
390 	 * 4. Configure the DBS.RX_UW_DATA memory; good idea to initialize all
391 	 *   DBS_RX_QUEUES entries.
392 	 *   Notice: We always start out with interrupts disabled (by setting the
393 	 *     "irq_vector" argument to -1). Queues that require interrupts will have
394 	 *     it enabled at a later time (after we have enabled vfio interrupts in
395 	 *     the kernel).
396 	 */
397 	int_enable = 0;
398 	vec = 0;
399 	istk = 0;
400 	NT_LOG_DBGX(DBG, NTNIC, "set_rx_uw_data int=0 irq_vector=%u", irq_vector);
401 
402 	if (set_rx_uw_data(p_nthw_dbs, index,
403 			(uint64_t)used_struct_phys_addr,
404 			host_id, qs, 0, int_enable, vec, istk) != 0) {
405 		return NULL;
406 	}
407 
408 	/*
409 	 * 2. Configure the DBS.RX_AM_DATA memory and enable the queues you plan to use;
410 	 *  good idea to initialize all DBS_RX_QUEUES entries.
411 	 *  Notice: We do this only for queues that don't require interrupts (i.e. if
412 	 *    irq_vector < 0). Queues that require interrupts will have RX_AM_DATA enabled
413 	 *    at a later time (after we have enabled vfio interrupts in the kernel).
414 	 */
415 	if (irq_vector < 0) {
416 		if (set_rx_am_data(p_nthw_dbs, index, (uint64_t)avail_struct_phys_addr,
417 				RX_AM_DISABLE, host_id, 0,
418 				irq_vector >= 0 ? 1 : 0) != 0) {
419 			return NULL;
420 		}
421 	}
422 
423 	/*
424 	 * 5. Initialize all RX queues (all DBS_RX_QUEUES of them) using the
425 	 *   DBS.RX_INIT register.
426 	 */
427 	dbs_init_rx_queue(p_nthw_dbs, index, start_idx, start_ptr);
428 
429 	/*
430 	 * 2. Configure the DBS.RX_AM_DATA memory and enable the queues you plan to use;
431 	 *  good idea to initialize all DBS_RX_QUEUES entries.
432 	 */
433 	if (set_rx_am_data(p_nthw_dbs, index, (uint64_t)avail_struct_phys_addr, RX_AM_ENABLE,
434 			host_id, 0, irq_vector >= 0 ? 1 : 0) != 0) {
435 		return NULL;
436 	}
437 
438 	/* Save queue state */
439 	rxvq[index].usage = NTHW_VIRTQ_UNMANAGED;
440 	rxvq[index].mp_nthw_dbs = p_nthw_dbs;
441 	rxvq[index].index = index;
442 	rxvq[index].queue_size = queue_size;
443 	rxvq[index].am_enable = (irq_vector < 0) ? RX_AM_ENABLE : RX_AM_DISABLE;
444 	rxvq[index].host_id = host_id;
445 	rxvq[index].avail_struct_phys_addr = avail_struct_phys_addr;
446 	rxvq[index].used_struct_phys_addr = used_struct_phys_addr;
447 	rxvq[index].desc_struct_phys_addr = desc_struct_phys_addr;
448 	rxvq[index].vq_type = vq_type;
449 	rxvq[index].in_order = 0;	/* not used */
450 	rxvq[index].irq_vector = irq_vector;
451 
452 	/* Return queue handle */
453 	return &rxvq[index];
454 }
455 
456 static int dbs_wait_hw_queue_shutdown(struct nthw_virt_queue *vq, int rx);
457 
458 static int dbs_wait_on_busy(struct nthw_virt_queue *vq, uint32_t *idle, int rx)
459 {
460 	uint32_t busy;
461 	uint32_t queue;
462 	int err = 0;
463 	nthw_dbs_t *p_nthw_dbs = vq->mp_nthw_dbs;
464 
465 	do {
466 		if (rx)
467 			err = get_rx_idle(p_nthw_dbs, idle, &queue, &busy);
468 
469 		else
470 			err = get_tx_idle(p_nthw_dbs, idle, &queue, &busy);
471 	} while (!err && busy);
472 
473 	return err;
474 }
475 
476 static int dbs_wait_hw_queue_shutdown(struct nthw_virt_queue *vq, int rx)
477 {
478 	int err = 0;
479 	uint32_t idle = 0;
480 	nthw_dbs_t *p_nthw_dbs = vq->mp_nthw_dbs;
481 
482 	err = dbs_wait_on_busy(vq, &idle, rx);
483 
484 	if (err) {
485 		if (err == -ENOTSUP) {
486 			nt_os_wait_usec(200000);
487 			return 0;
488 		}
489 
490 		return -1;
491 	}
492 
493 	do {
494 		if (rx)
495 			err = set_rx_idle(p_nthw_dbs, 1, vq->index);
496 
497 		else
498 			err = set_tx_idle(p_nthw_dbs, 1, vq->index);
499 
500 		if (err)
501 			return -1;
502 
503 		if (dbs_wait_on_busy(vq, &idle, rx) != 0)
504 			return -1;
505 
506 	} while (idle == 0);
507 
508 	return 0;
509 }
510 
511 static int dbs_internal_release_rx_virt_queue(struct nthw_virt_queue *rxvq)
512 {
513 	nthw_dbs_t *p_nthw_dbs = rxvq->mp_nthw_dbs;
514 
515 	if (rxvq == NULL)
516 		return -1;
517 
518 	/* Clear UW */
519 	rxvq->used_struct_phys_addr = NULL;
520 
521 	if (set_rx_uw_data(p_nthw_dbs, rxvq->index, (uint64_t)rxvq->used_struct_phys_addr,
522 			rxvq->host_id, 0, PACKED(rxvq->vq_type), 0, 0, 0) != 0) {
523 		return -1;
524 	}
525 
526 	/* Disable AM */
527 	rxvq->am_enable = RX_AM_DISABLE;
528 
529 	if (set_rx_am_data(p_nthw_dbs,
530 			rxvq->index,
531 			(uint64_t)rxvq->avail_struct_phys_addr,
532 			rxvq->am_enable,
533 			rxvq->host_id,
534 			PACKED(rxvq->vq_type),
535 			0) != 0) {
536 		return -1;
537 	}
538 
539 	/* Let the FPGA finish packet processing */
540 	if (dbs_wait_hw_queue_shutdown(rxvq, 1) != 0)
541 		return -1;
542 
543 	/* Clear rest of AM */
544 	rxvq->avail_struct_phys_addr = NULL;
545 	rxvq->host_id = 0;
546 
547 	if (set_rx_am_data(p_nthw_dbs,
548 			rxvq->index,
549 			(uint64_t)rxvq->avail_struct_phys_addr,
550 			rxvq->am_enable,
551 			rxvq->host_id,
552 			PACKED(rxvq->vq_type),
553 			0) != 0)
554 		return -1;
555 
556 	/* Clear DR */
557 	rxvq->desc_struct_phys_addr = NULL;
558 
559 	if (set_rx_dr_data(p_nthw_dbs,
560 			rxvq->index,
561 			(uint64_t)rxvq->desc_struct_phys_addr,
562 			rxvq->host_id,
563 			0,
564 			rxvq->header,
565 			PACKED(rxvq->vq_type)) != 0)
566 		return -1;
567 
568 	/* Initialize queue */
569 	dbs_init_rx_queue(p_nthw_dbs, rxvq->index, 0, 0);
570 
571 	/* Reset queue state */
572 	rxvq->usage = NTHW_VIRTQ_UNUSED;
573 	rxvq->mp_nthw_dbs = p_nthw_dbs;
574 	rxvq->index = 0;
575 	rxvq->queue_size = 0;
576 
577 	return 0;
578 }
579 
580 static int nthw_release_mngd_rx_virt_queue(struct nthw_virt_queue *rxvq)
581 {
582 	if (rxvq == NULL || rxvq->usage != NTHW_VIRTQ_MANAGED)
583 		return -1;
584 
585 	if (rxvq->p_virtual_addr) {
586 		free(rxvq->p_virtual_addr);
587 		rxvq->p_virtual_addr = NULL;
588 	}
589 
590 	return dbs_internal_release_rx_virt_queue(rxvq);
591 }
592 
593 static int dbs_internal_release_tx_virt_queue(struct nthw_virt_queue *txvq)
594 {
595 	nthw_dbs_t *p_nthw_dbs = txvq->mp_nthw_dbs;
596 
597 	if (txvq == NULL)
598 		return -1;
599 
600 	/* Clear UW */
601 	txvq->used_struct_phys_addr = NULL;
602 
603 	if (set_tx_uw_data(p_nthw_dbs, txvq->index, (uint64_t)txvq->used_struct_phys_addr,
604 			txvq->host_id, 0, PACKED(txvq->vq_type), 0, 0, 0,
605 			txvq->in_order) != 0) {
606 		return -1;
607 	}
608 
609 	/* Disable AM */
610 	txvq->am_enable = TX_AM_DISABLE;
611 
612 	if (set_tx_am_data(p_nthw_dbs,
613 			txvq->index,
614 			(uint64_t)txvq->avail_struct_phys_addr,
615 			txvq->am_enable,
616 			txvq->host_id,
617 			PACKED(txvq->vq_type),
618 			0) != 0) {
619 		return -1;
620 	}
621 
622 	/* Let the FPGA finish packet processing */
623 	if (dbs_wait_hw_queue_shutdown(txvq, 0) != 0)
624 		return -1;
625 
626 	/* Clear rest of AM */
627 	txvq->avail_struct_phys_addr = NULL;
628 	txvq->host_id = 0;
629 
630 	if (set_tx_am_data(p_nthw_dbs,
631 			txvq->index,
632 			(uint64_t)txvq->avail_struct_phys_addr,
633 			txvq->am_enable,
634 			txvq->host_id,
635 			PACKED(txvq->vq_type),
636 			0) != 0) {
637 		return -1;
638 	}
639 
640 	/* Clear DR */
641 	txvq->desc_struct_phys_addr = NULL;
642 	txvq->port = 0;
643 	txvq->header = 0;
644 
645 	if (set_tx_dr_data(p_nthw_dbs,
646 			txvq->index,
647 			(uint64_t)txvq->desc_struct_phys_addr,
648 			txvq->host_id,
649 			0,
650 			txvq->port,
651 			txvq->header,
652 			PACKED(txvq->vq_type)) != 0) {
653 		return -1;
654 	}
655 
656 	/* Clear QP */
657 	txvq->virtual_port = 0;
658 
659 	if (nthw_dbs_set_tx_qp_data(p_nthw_dbs, txvq->index, txvq->virtual_port) != 0)
660 		return -1;
661 
662 	/* Initialize queue */
663 	dbs_init_tx_queue(p_nthw_dbs, txvq->index, 0, 0);
664 
665 	/* Reset queue state */
666 	txvq->usage = NTHW_VIRTQ_UNUSED;
667 	txvq->mp_nthw_dbs = p_nthw_dbs;
668 	txvq->index = 0;
669 	txvq->queue_size = 0;
670 
671 	return 0;
672 }
673 
674 static int nthw_release_mngd_tx_virt_queue(struct nthw_virt_queue *txvq)
675 {
676 	if (txvq == NULL || txvq->usage != NTHW_VIRTQ_MANAGED)
677 		return -1;
678 
679 	if (txvq->p_virtual_addr) {
680 		free(txvq->p_virtual_addr);
681 		txvq->p_virtual_addr = NULL;
682 	}
683 
684 	return dbs_internal_release_tx_virt_queue(txvq);
685 }
686 
687 static struct nthw_virt_queue *nthw_setup_tx_virt_queue(nthw_dbs_t *p_nthw_dbs,
688 	uint32_t index,
689 	uint16_t start_idx,
690 	uint16_t start_ptr,
691 	void *avail_struct_phys_addr,
692 	void *used_struct_phys_addr,
693 	void *desc_struct_phys_addr,
694 	uint16_t queue_size,
695 	uint32_t host_id,
696 	uint32_t port,
697 	uint32_t virtual_port,
698 	uint32_t header,
699 	uint32_t vq_type,
700 	int irq_vector,
701 	uint32_t in_order)
702 {
703 	uint32_t int_enable;
704 	uint32_t vec;
705 	uint32_t istk;
706 	uint32_t qs = dbs_qsize_log2(queue_size);
707 
708 	/*
709 	 * Setup DBS module - DSF00094
710 	 * 3. Configure the DBS.TX_DR_DATA memory; good idea to initialize all
711 	 *    DBS_TX_QUEUES entries.
712 	 */
713 	if (set_tx_dr_data(p_nthw_dbs, index, (uint64_t)desc_struct_phys_addr, host_id, qs, port,
714 			header, 0) != 0) {
715 		return NULL;
716 	}
717 
718 	/*
719 	 * 4. Configure the DBS.TX_UW_DATA memory; good idea to initialize all
720 	 *    DBS_TX_QUEUES entries.
721 	 *    Notice: We always start out with interrupts disabled (by setting the
722 	 *            "irq_vector" argument to -1). Queues that require interrupts will have
723 	 *             it enabled at a later time (after we have enabled vfio interrupts in the
724 	 *             kernel).
725 	 */
726 	int_enable = 0;
727 	vec = 0;
728 	istk = 0;
729 
730 	if (set_tx_uw_data(p_nthw_dbs, index,
731 			(uint64_t)used_struct_phys_addr,
732 			host_id, qs, 0, int_enable, vec, istk, in_order) != 0) {
733 		return NULL;
734 	}
735 
736 	/*
737 	 * 2. Configure the DBS.TX_AM_DATA memory and enable the queues you plan to use;
738 	 *    good idea to initialize all DBS_TX_QUEUES entries.
739 	 */
740 	if (set_tx_am_data(p_nthw_dbs, index, (uint64_t)avail_struct_phys_addr, TX_AM_DISABLE,
741 			host_id, 0, irq_vector >= 0 ? 1 : 0) != 0) {
742 		return NULL;
743 	}
744 
745 	/*
746 	 * 5. Initialize all TX queues (all DBS_TX_QUEUES of them) using the
747 	 *    DBS.TX_INIT register.
748 	 */
749 	dbs_init_tx_queue(p_nthw_dbs, index, start_idx, start_ptr);
750 
751 	if (nthw_dbs_set_tx_qp_data(p_nthw_dbs, index, virtual_port) != 0)
752 		return NULL;
753 
754 	/*
755 	 * 2. Configure the DBS.TX_AM_DATA memory and enable the queues you plan to use;
756 	 *    good idea to initialize all DBS_TX_QUEUES entries.
757 	 *    Notice: We do this only for queues that don't require interrupts (i.e. if
758 	 *            irq_vector < 0). Queues that require interrupts will have TX_AM_DATA
759 	 *            enabled at a later time (after we have enabled vfio interrupts in the
760 	 *            kernel).
761 	 */
762 	if (irq_vector < 0) {
763 		if (set_tx_am_data(p_nthw_dbs, index, (uint64_t)avail_struct_phys_addr,
764 				TX_AM_ENABLE, host_id, 0,
765 				irq_vector >= 0 ? 1 : 0) != 0) {
766 			return NULL;
767 		}
768 	}
769 
770 	/* Save queue state */
771 	txvq[index].usage = NTHW_VIRTQ_UNMANAGED;
772 	txvq[index].mp_nthw_dbs = p_nthw_dbs;
773 	txvq[index].index = index;
774 	txvq[index].queue_size = queue_size;
775 	txvq[index].am_enable = (irq_vector < 0) ? TX_AM_ENABLE : TX_AM_DISABLE;
776 	txvq[index].host_id = host_id;
777 	txvq[index].port = port;
778 	txvq[index].virtual_port = virtual_port;
779 	txvq[index].avail_struct_phys_addr = avail_struct_phys_addr;
780 	txvq[index].used_struct_phys_addr = used_struct_phys_addr;
781 	txvq[index].desc_struct_phys_addr = desc_struct_phys_addr;
782 	txvq[index].vq_type = vq_type;
783 	txvq[index].in_order = in_order;
784 	txvq[index].irq_vector = irq_vector;
785 
786 	/* Return queue handle */
787 	return &txvq[index];
788 }
789 
790 static struct nthw_virt_queue *
791 nthw_setup_mngd_rx_virt_queue_split(nthw_dbs_t *p_nthw_dbs,
792 	uint32_t index,
793 	uint32_t queue_size,
794 	uint32_t host_id,
795 	uint32_t header,
796 	struct nthw_memory_descriptor *p_virt_struct_area,
797 	struct nthw_memory_descriptor *p_packet_buffers,
798 	int irq_vector)
799 {
800 	struct virtq_struct_layout_s virtq_struct_layout = dbs_calc_struct_layout(queue_size);
801 
802 	dbs_initialize_virt_queue_structs(p_virt_struct_area->virt_addr,
803 		(char *)p_virt_struct_area->virt_addr +
804 		virtq_struct_layout.used_offset,
805 		(char *)p_virt_struct_area->virt_addr +
806 		virtq_struct_layout.desc_offset,
807 		p_packet_buffers,
808 		(uint16_t)queue_size,
809 		p_packet_buffers ? (uint16_t)queue_size : 0,
810 		VIRTQ_DESC_F_WRITE /* Rx */);
811 
812 	rxvq[index].p_avail = p_virt_struct_area->virt_addr;
813 	rxvq[index].p_used =
814 		(void *)((char *)p_virt_struct_area->virt_addr + virtq_struct_layout.used_offset);
815 	rxvq[index].p_desc =
816 		(void *)((char *)p_virt_struct_area->virt_addr + virtq_struct_layout.desc_offset);
817 
818 	rxvq[index].am_idx = p_packet_buffers ? (uint16_t)queue_size : 0;
819 	rxvq[index].used_idx = 0;
820 	rxvq[index].cached_idx = 0;
821 	rxvq[index].p_virtual_addr = NULL;
822 
823 	if (p_packet_buffers) {
824 		rxvq[index].p_virtual_addr = malloc(queue_size * sizeof(*p_packet_buffers));
825 		memcpy(rxvq[index].p_virtual_addr, p_packet_buffers,
826 			queue_size * sizeof(*p_packet_buffers));
827 	}
828 
829 	nthw_setup_rx_virt_queue(p_nthw_dbs, index, 0, 0, (void *)p_virt_struct_area->phys_addr,
830 		(char *)p_virt_struct_area->phys_addr +
831 		virtq_struct_layout.used_offset,
832 		(char *)p_virt_struct_area->phys_addr +
833 		virtq_struct_layout.desc_offset,
834 		(uint16_t)queue_size, host_id, header, SPLIT_RING, irq_vector);
835 
836 	rxvq[index].usage = NTHW_VIRTQ_MANAGED;
837 
838 	return &rxvq[index];
839 }
840 
841 static struct nthw_virt_queue *
842 nthw_setup_mngd_tx_virt_queue_split(nthw_dbs_t *p_nthw_dbs,
843 	uint32_t index,
844 	uint32_t queue_size,
845 	uint32_t host_id,
846 	uint32_t port,
847 	uint32_t virtual_port,
848 	uint32_t header,
849 	int irq_vector,
850 	uint32_t in_order,
851 	struct nthw_memory_descriptor *p_virt_struct_area,
852 	struct nthw_memory_descriptor *p_packet_buffers)
853 {
854 	struct virtq_struct_layout_s virtq_struct_layout = dbs_calc_struct_layout(queue_size);
855 
856 	dbs_initialize_virt_queue_structs(p_virt_struct_area->virt_addr,
857 		(char *)p_virt_struct_area->virt_addr +
858 		virtq_struct_layout.used_offset,
859 		(char *)p_virt_struct_area->virt_addr +
860 		virtq_struct_layout.desc_offset,
861 		p_packet_buffers,
862 		(uint16_t)queue_size,
863 		0,
864 		0 /* Tx */);
865 
866 	txvq[index].p_avail = p_virt_struct_area->virt_addr;
867 	txvq[index].p_used =
868 		(void *)((char *)p_virt_struct_area->virt_addr + virtq_struct_layout.used_offset);
869 	txvq[index].p_desc =
870 		(void *)((char *)p_virt_struct_area->virt_addr + virtq_struct_layout.desc_offset);
871 	txvq[index].queue_size = (uint16_t)queue_size;
872 	txvq[index].am_idx = 0;
873 	txvq[index].used_idx = 0;
874 	txvq[index].cached_idx = 0;
875 	txvq[index].p_virtual_addr = NULL;
876 
877 	txvq[index].tx_descr_avail_idx = 0;
878 
879 	if (p_packet_buffers) {
880 		txvq[index].p_virtual_addr = malloc(queue_size * sizeof(*p_packet_buffers));
881 		memcpy(txvq[index].p_virtual_addr, p_packet_buffers,
882 			queue_size * sizeof(*p_packet_buffers));
883 	}
884 
885 	nthw_setup_tx_virt_queue(p_nthw_dbs, index, 0, 0, (void *)p_virt_struct_area->phys_addr,
886 		(char *)p_virt_struct_area->phys_addr +
887 		virtq_struct_layout.used_offset,
888 		(char *)p_virt_struct_area->phys_addr +
889 		virtq_struct_layout.desc_offset,
890 		(uint16_t)queue_size, host_id, port, virtual_port, header,
891 		SPLIT_RING, irq_vector, in_order);
892 
893 	txvq[index].usage = NTHW_VIRTQ_MANAGED;
894 
895 	return &txvq[index];
896 }
897 
898 /*
899  * Packed Ring
900  */
901 static int nthw_setup_managed_virt_queue_packed(struct nthw_virt_queue *vq,
902 	struct pvirtq_struct_layout_s *pvirtq_layout,
903 	struct nthw_memory_descriptor *p_virt_struct_area,
904 	struct nthw_memory_descriptor *p_packet_buffers,
905 	uint16_t flags,
906 	int rx)
907 {
908 	/* page aligned */
909 	assert(((uintptr_t)p_virt_struct_area->phys_addr & 0xfff) == 0);
910 	assert(p_packet_buffers);
911 
912 	/* clean canvas */
913 	memset(p_virt_struct_area->virt_addr, 0,
914 		sizeof(struct pvirtq_desc) * vq->queue_size +
915 		sizeof(struct pvirtq_event_suppress) * 2 + sizeof(int) * vq->queue_size);
916 
917 	pvirtq_layout->device_event_offset = sizeof(struct pvirtq_desc) * vq->queue_size;
918 	pvirtq_layout->driver_event_offset =
919 		pvirtq_layout->device_event_offset + sizeof(struct pvirtq_event_suppress);
920 
921 	vq->desc = p_virt_struct_area->virt_addr;
922 	vq->device_event = (void *)((uintptr_t)vq->desc + pvirtq_layout->device_event_offset);
923 	vq->driver_event = (void *)((uintptr_t)vq->desc + pvirtq_layout->driver_event_offset);
924 
925 	vq->next_avail = 0;
926 	vq->next_used = 0;
927 	vq->avail_wrap_count = 1;
928 	vq->used_wrap_count = 1;
929 
930 	/*
931 	 * Only possible if FPGA always delivers in-order
932 	 * Buffer ID used is the index in the p_packet_buffers array
933 	 */
934 	unsigned int i;
935 	struct pvirtq_desc *p_desc = vq->desc;
936 
937 	for (i = 0; i < vq->queue_size; i++) {
938 		if (rx) {
939 			p_desc[i].addr = (uint64_t)p_packet_buffers[i].phys_addr;
940 			p_desc[i].len = p_packet_buffers[i].len;
941 		}
942 
943 		p_desc[i].id = i;
944 		p_desc[i].flags = flags;
945 	}
946 
947 	if (rx)
948 		vq->avail_wrap_count ^= 1;	/* filled up available buffers for Rx */
949 	else
950 		vq->used_wrap_count ^= 1;	/* pre-fill free buffer IDs */
951 
952 	if (vq->queue_size == 0)
953 		return -1;	/* don't allocate memory with size of 0 bytes */
954 
955 	vq->p_virtual_addr = malloc(vq->queue_size * sizeof(*p_packet_buffers));
956 
957 	if (vq->p_virtual_addr == NULL)
958 		return -1;
959 
960 	memcpy(vq->p_virtual_addr, p_packet_buffers, vq->queue_size * sizeof(*p_packet_buffers));
961 
962 	/* Not used yet by FPGA - make sure we disable */
963 	vq->device_event->flags = RING_EVENT_FLAGS_DISABLE;
964 
965 	return 0;
966 }
967 
968 static struct nthw_virt_queue *
969 nthw_setup_managed_rx_virt_queue_packed(nthw_dbs_t *p_nthw_dbs,
970 	uint32_t index,
971 	uint32_t queue_size,
972 	uint32_t host_id,
973 	uint32_t header,
974 	struct nthw_memory_descriptor *p_virt_struct_area,
975 	struct nthw_memory_descriptor *p_packet_buffers,
976 	int irq_vector)
977 {
978 	struct pvirtq_struct_layout_s pvirtq_layout;
979 	struct nthw_virt_queue *vq = &rxvq[index];
980 	/* Set size and setup packed vq ring */
981 	vq->queue_size = queue_size;
982 
983 	/* Use Avail flag bit == 1 because wrap bit is initially set to 1 - and Used is inverse */
984 	if (nthw_setup_managed_virt_queue_packed(vq, &pvirtq_layout, p_virt_struct_area,
985 			p_packet_buffers,
986 			VIRTQ_DESC_F_WRITE | VIRTQ_DESC_F_AVAIL, 1) != 0)
987 		return NULL;
988 
989 	nthw_setup_rx_virt_queue(p_nthw_dbs, index, 0x8000, 0,	/* start wrap ring counter as 1 */
990 		(void *)((uintptr_t)p_virt_struct_area->phys_addr +
991 			pvirtq_layout.driver_event_offset),
992 		(void *)((uintptr_t)p_virt_struct_area->phys_addr +
993 			pvirtq_layout.device_event_offset),
994 		p_virt_struct_area->phys_addr, (uint16_t)queue_size, host_id,
995 		header, PACKED_RING, irq_vector);
996 
997 	vq->usage = NTHW_VIRTQ_MANAGED;
998 	return vq;
999 }
1000 
1001 static struct nthw_virt_queue *
1002 nthw_setup_managed_tx_virt_queue_packed(nthw_dbs_t *p_nthw_dbs,
1003 	uint32_t index,
1004 	uint32_t queue_size,
1005 	uint32_t host_id,
1006 	uint32_t port,
1007 	uint32_t virtual_port,
1008 	uint32_t header,
1009 	int irq_vector,
1010 	uint32_t in_order,
1011 	struct nthw_memory_descriptor *p_virt_struct_area,
1012 	struct nthw_memory_descriptor *p_packet_buffers)
1013 {
1014 	struct pvirtq_struct_layout_s pvirtq_layout;
1015 	struct nthw_virt_queue *vq = &txvq[index];
1016 	/* Set size and setup packed vq ring */
1017 	vq->queue_size = queue_size;
1018 
1019 	if (nthw_setup_managed_virt_queue_packed(vq, &pvirtq_layout, p_virt_struct_area,
1020 			p_packet_buffers, 0, 0) != 0)
1021 		return NULL;
1022 
1023 	nthw_setup_tx_virt_queue(p_nthw_dbs, index, 0x8000, 0,	/* start wrap ring counter as 1 */
1024 		(void *)((uintptr_t)p_virt_struct_area->phys_addr +
1025 			pvirtq_layout.driver_event_offset),
1026 		(void *)((uintptr_t)p_virt_struct_area->phys_addr +
1027 			pvirtq_layout.device_event_offset),
1028 		p_virt_struct_area->phys_addr, (uint16_t)queue_size, host_id,
1029 		port, virtual_port, header, PACKED_RING, irq_vector, in_order);
1030 
1031 	vq->usage = NTHW_VIRTQ_MANAGED;
1032 	return vq;
1033 }
1034 
1035 /*
1036  * Create a Managed Rx Virt Queue
1037  *
1038  * Notice: The queue will be created with interrupts disabled.
1039  *   If interrupts are required, make sure to call nthw_enable_rx_virt_queue()
1040  *   afterwards.
1041  */
1042 static struct nthw_virt_queue *
1043 nthw_setup_mngd_rx_virt_queue(nthw_dbs_t *p_nthw_dbs,
1044 	uint32_t index,
1045 	uint32_t queue_size,
1046 	uint32_t host_id,
1047 	uint32_t header,
1048 	struct nthw_memory_descriptor *p_virt_struct_area,
1049 	struct nthw_memory_descriptor *p_packet_buffers,
1050 	uint32_t vq_type,
1051 	int irq_vector)
1052 {
1053 	switch (vq_type) {
1054 	case SPLIT_RING:
1055 		return nthw_setup_mngd_rx_virt_queue_split(p_nthw_dbs, index, queue_size,
1056 				host_id, header, p_virt_struct_area,
1057 				p_packet_buffers, irq_vector);
1058 
1059 	case PACKED_RING:
1060 		return nthw_setup_managed_rx_virt_queue_packed(p_nthw_dbs, index, queue_size,
1061 				host_id, header, p_virt_struct_area,
1062 				p_packet_buffers, irq_vector);
1063 
1064 	default:
1065 		break;
1066 	}
1067 
1068 	return NULL;
1069 }
1070 
1071 /*
1072  * Create a Managed Tx Virt Queue
1073  *
1074  * Notice: The queue will be created with interrupts disabled.
1075  *   If interrupts are required, make sure to call nthw_enable_tx_virt_queue()
1076  *   afterwards.
1077  */
1078 static struct nthw_virt_queue *
1079 nthw_setup_mngd_tx_virt_queue(nthw_dbs_t *p_nthw_dbs,
1080 	uint32_t index,
1081 	uint32_t queue_size,
1082 	uint32_t host_id,
1083 	uint32_t port,
1084 	uint32_t virtual_port,
1085 	uint32_t header,
1086 	struct nthw_memory_descriptor *p_virt_struct_area,
1087 	struct nthw_memory_descriptor *p_packet_buffers,
1088 	uint32_t vq_type,
1089 	int irq_vector,
1090 	uint32_t in_order)
1091 {
1092 	switch (vq_type) {
1093 	case SPLIT_RING:
1094 		return nthw_setup_mngd_tx_virt_queue_split(p_nthw_dbs, index, queue_size,
1095 				host_id, port, virtual_port, header,
1096 				irq_vector, in_order,
1097 				p_virt_struct_area,
1098 				p_packet_buffers);
1099 
1100 	case PACKED_RING:
1101 		return nthw_setup_managed_tx_virt_queue_packed(p_nthw_dbs, index, queue_size,
1102 				host_id, port, virtual_port, header,
1103 				irq_vector, in_order,
1104 				p_virt_struct_area,
1105 				p_packet_buffers);
1106 
1107 	default:
1108 		break;
1109 	}
1110 
1111 	return NULL;
1112 }
1113 
1114 static uint16_t nthw_get_rx_packets(struct nthw_virt_queue *rxvq,
1115 	uint16_t n,
1116 	struct nthw_received_packets *rp,
1117 	uint16_t *nb_pkts)
1118 {
1119 	uint16_t segs = 0;
1120 	uint16_t pkts = 0;
1121 
1122 	if (rxvq->vq_type == SPLIT_RING) {
1123 		uint16_t i;
1124 		uint16_t entries_ready = (uint16_t)(rxvq->cached_idx - rxvq->used_idx);
1125 
1126 		if (entries_ready < n) {
1127 			/* Look for more packets */
1128 			rxvq->cached_idx = rxvq->p_used->idx;
1129 			entries_ready = (uint16_t)(rxvq->cached_idx - rxvq->used_idx);
1130 
1131 			if (entries_ready == 0) {
1132 				*nb_pkts = 0;
1133 				return 0;
1134 			}
1135 
1136 			if (n > entries_ready)
1137 				n = entries_ready;
1138 		}
1139 
1140 		/*
1141 		 * Give packets - make sure all packets are whole packets.
1142 		 * Valid because queue_size is always 2^n
1143 		 */
1144 		const uint16_t queue_mask = (uint16_t)(rxvq->queue_size - 1);
1145 		const uint32_t buf_len = rxvq->p_desc[0].len;
1146 
1147 		uint16_t used = rxvq->used_idx;
1148 
1149 		for (i = 0; i < n; ++i) {
1150 			uint32_t id = rxvq->p_used->ring[used & queue_mask].id;
1151 			rp[i].addr = rxvq->p_virtual_addr[id].virt_addr;
1152 			rp[i].len = rxvq->p_used->ring[used & queue_mask].len;
1153 
1154 			uint32_t pkt_len = ((struct _pkt_hdr_rx *)rp[i].addr)->cap_len;
1155 
1156 			if (pkt_len > buf_len) {
1157 				/* segmented */
1158 				int nbsegs = (pkt_len + buf_len - 1) / buf_len;
1159 
1160 				if (((int)i + nbsegs) > n) {
1161 					/* don't have enough segments - break out */
1162 					break;
1163 				}
1164 
1165 				int ii;
1166 
1167 				for (ii = 1; ii < nbsegs; ii++) {
1168 					++i;
1169 					id = rxvq->p_used->ring[(used + ii) & queue_mask].id;
1170 					rp[i].addr = rxvq->p_virtual_addr[id].virt_addr;
1171 					rp[i].len =
1172 						rxvq->p_used->ring[(used + ii) & queue_mask].len;
1173 				}
1174 
1175 				used += nbsegs;
1176 
1177 			} else {
1178 				++used;
1179 			}
1180 
1181 			pkts++;
1182 			segs = i + 1;
1183 		}
1184 
1185 		rxvq->used_idx = used;
1186 
1187 	} else if (rxvq->vq_type == PACKED_RING) {
1188 		/* This requires in-order behavior from FPGA */
1189 		int i;
1190 
1191 		for (i = 0; i < n; i++) {
1192 			struct pvirtq_desc *desc = &rxvq->desc[rxvq->next_used];
1193 
1194 			uint16_t flags = desc->flags;
1195 			uint8_t avail = !!(flags & VIRTQ_DESC_F_AVAIL);
1196 			uint8_t used = !!(flags & VIRTQ_DESC_F_USED);
1197 
1198 			if (avail != rxvq->used_wrap_count || used != rxvq->used_wrap_count)
1199 				break;
1200 
1201 			rp[pkts].addr = rxvq->p_virtual_addr[desc->id].virt_addr;
1202 			rp[pkts].len = desc->len;
1203 			pkts++;
1204 
1205 			inc_used(rxvq, 1);
1206 		}
1207 
1208 		segs = pkts;
1209 	}
1210 
1211 	*nb_pkts = pkts;
1212 	return segs;
1213 }
1214 
1215 /*
1216  * Put buffers back into Avail Ring
1217  */
1218 static void nthw_release_rx_packets(struct nthw_virt_queue *rxvq, uint16_t n)
1219 {
1220 	if (rxvq->vq_type == SPLIT_RING) {
1221 		rxvq->am_idx = (uint16_t)(rxvq->am_idx + n);
1222 		rxvq->p_avail->idx = rxvq->am_idx;
1223 
1224 	} else if (rxvq->vq_type == PACKED_RING) {
1225 		int i;
1226 		/*
1227 		 * Defer flags update on first segment - due to serialization towards HW and
1228 		 * when jumbo segments are added
1229 		 */
1230 
1231 		uint16_t first_flags = VIRTQ_DESC_F_WRITE | avail_flag(rxvq) | used_flag_inv(rxvq);
1232 		struct pvirtq_desc *first_desc = &rxvq->desc[rxvq->next_avail];
1233 
1234 		uint32_t len = rxvq->p_virtual_addr[0].len;	/* all same size */
1235 
1236 		/* Optimization point: use in-order release */
1237 
1238 		for (i = 0; i < n; i++) {
1239 			struct pvirtq_desc *desc = &rxvq->desc[rxvq->next_avail];
1240 
1241 			desc->id = rxvq->next_avail;
1242 			desc->addr = (uint64_t)rxvq->p_virtual_addr[desc->id].phys_addr;
1243 			desc->len = len;
1244 
1245 			if (i)
1246 				desc->flags = VIRTQ_DESC_F_WRITE | avail_flag(rxvq) |
1247 					used_flag_inv(rxvq);
1248 
1249 			inc_avail(rxvq, 1);
1250 		}
1251 
1252 		rte_rmb();
1253 		first_desc->flags = first_flags;
1254 	}
1255 }
1256 
1257 static uint16_t nthw_get_tx_packets(struct nthw_virt_queue *txvq,
1258 	uint16_t n,
1259 	uint16_t *first_idx,
1260 	struct nthw_cvirtq_desc *cvq,
1261 	struct nthw_memory_descriptor **p_virt_addr)
1262 {
1263 	int m = 0;
1264 	uint16_t queue_mask =
1265 		(uint16_t)(txvq->queue_size - 1);	/* Valid because queue_size is always 2^n */
1266 	*p_virt_addr = txvq->p_virtual_addr;
1267 
1268 	if (txvq->vq_type == SPLIT_RING) {
1269 		cvq->s = txvq->p_desc;
1270 		cvq->vq_type = SPLIT_RING;
1271 
1272 		*first_idx = txvq->tx_descr_avail_idx;
1273 
1274 		uint16_t entries_used =
1275 			(uint16_t)((txvq->tx_descr_avail_idx - txvq->cached_idx) & queue_mask);
1276 		uint16_t entries_ready = (uint16_t)(txvq->queue_size - 1 - entries_used);
1277 
1278 		vq_log_arg(txvq,
1279 			"ask %i: descrAvail %i, cachedidx %i, used: %i, ready %i used->idx %i",
1280 			n, txvq->tx_descr_avail_idx, txvq->cached_idx, entries_used, entries_ready,
1281 			txvq->p_used->idx);
1282 
1283 		if (entries_ready < n) {
1284 			/*
1285 			 * Look for more packets.
1286 			 * Using the used_idx in the avail ring since they are held synchronous
1287 			 * because of in-order
1288 			 */
1289 			txvq->cached_idx =
1290 				txvq->p_avail->ring[(txvq->p_used->idx - 1) & queue_mask];
1291 
1292 			vq_log_arg(txvq, "Update: get cachedidx %i (used_idx-1 %i)",
1293 				txvq->cached_idx, (txvq->p_used->idx - 1) & queue_mask);
1294 			entries_used =
1295 				(uint16_t)((txvq->tx_descr_avail_idx - txvq->cached_idx)
1296 				& queue_mask);
1297 			entries_ready = (uint16_t)(txvq->queue_size - 1 - entries_used);
1298 			vq_log_arg(txvq, "new used: %i, ready %i", entries_used, entries_ready);
1299 
1300 			if (n > entries_ready)
1301 				n = entries_ready;
1302 		}
1303 
1304 	} else if (txvq->vq_type == PACKED_RING) {
1305 		int i;
1306 
1307 		cvq->p = txvq->desc;
1308 		cvq->vq_type = PACKED_RING;
1309 
1310 		if (txvq->outs.num) {
1311 			*first_idx = txvq->outs.next;
1312 			uint16_t num = min(n, txvq->outs.num);
1313 			txvq->outs.next = (txvq->outs.next + num) & queue_mask;
1314 			txvq->outs.num -= num;
1315 
1316 			if (n == num)
1317 				return n;
1318 
1319 			m = num;
1320 			n -= num;
1321 
1322 		} else {
1323 			*first_idx = txvq->next_used;
1324 		}
1325 
1326 		/* iterate the ring - this requires in-order behavior from FPGA */
1327 		for (i = 0; i < n; i++) {
1328 			struct pvirtq_desc *desc = &txvq->desc[txvq->next_used];
1329 
1330 			uint16_t flags = desc->flags;
1331 			uint8_t avail = !!(flags & VIRTQ_DESC_F_AVAIL);
1332 			uint8_t used = !!(flags & VIRTQ_DESC_F_USED);
1333 
1334 			if (avail != txvq->used_wrap_count || used != txvq->used_wrap_count) {
1335 				n = i;
1336 				break;
1337 			}
1338 
1339 			uint16_t incr = (desc->id - txvq->next_used) & queue_mask;
1340 			i += incr;
1341 			inc_used(txvq, incr + 1);
1342 		}
1343 
1344 		if (i > n) {
1345 			int outs_num = i - n;
1346 			txvq->outs.next = (txvq->next_used - outs_num) & queue_mask;
1347 			txvq->outs.num = outs_num;
1348 		}
1349 
1350 	} else {
1351 		return 0;
1352 	}
1353 
1354 	return m + n;
1355 }
1356 
1357 static void nthw_release_tx_packets(struct nthw_virt_queue *txvq, uint16_t n, uint16_t n_segs[])
1358 {
1359 	int i;
1360 
1361 	if (txvq->vq_type == SPLIT_RING) {
1362 		/* Valid because queue_size is always 2^n */
1363 		uint16_t queue_mask = (uint16_t)(txvq->queue_size - 1);
1364 
1365 		vq_log_arg(txvq, "pkts %i, avail idx %i, start at %i", n, txvq->am_idx,
1366 			txvq->tx_descr_avail_idx);
1367 
1368 		for (i = 0; i < n; i++) {
1369 			int idx = txvq->am_idx & queue_mask;
1370 			txvq->p_avail->ring[idx] = txvq->tx_descr_avail_idx;
1371 			txvq->tx_descr_avail_idx =
1372 				(txvq->tx_descr_avail_idx + n_segs[i]) & queue_mask;
1373 			txvq->am_idx++;
1374 		}
1375 
1376 		/* Make sure the ring has been updated before HW reads index update */
1377 		rte_mb();
1378 		txvq->p_avail->idx = txvq->am_idx;
1379 		vq_log_arg(txvq, "new avail idx %i, descr_idx %i", txvq->p_avail->idx,
1380 			txvq->tx_descr_avail_idx);
1381 
1382 	} else if (txvq->vq_type == PACKED_RING) {
1383 		/*
1384 		 * Defer flags update on first segment - due to serialization towards HW and
1385 		 * when jumbo segments are added
1386 		 */
1387 
1388 		uint16_t first_flags = avail_flag(txvq) | used_flag_inv(txvq);
1389 		struct pvirtq_desc *first_desc = &txvq->desc[txvq->next_avail];
1390 
1391 		for (i = 0; i < n; i++) {
1392 			struct pvirtq_desc *desc = &txvq->desc[txvq->next_avail];
1393 
1394 			desc->id = txvq->next_avail;
1395 			desc->addr = (uint64_t)txvq->p_virtual_addr[desc->id].phys_addr;
1396 
1397 			if (i)
1398 				/* bitwise-or here because next flags may already have been setup
1399 				 */
1400 				desc->flags |= avail_flag(txvq) | used_flag_inv(txvq);
1401 
1402 			inc_avail(txvq, 1);
1403 		}
1404 
1405 		/* Proper read barrier before FPGA may see first flags */
1406 		rte_rmb();
1407 		first_desc->flags = first_flags;
1408 	}
1409 }
1410 
1411 static struct sg_ops_s sg_ops = {
1412 	.nthw_setup_rx_virt_queue = nthw_setup_rx_virt_queue,
1413 	.nthw_setup_tx_virt_queue = nthw_setup_tx_virt_queue,
1414 	.nthw_setup_mngd_rx_virt_queue = nthw_setup_mngd_rx_virt_queue,
1415 	.nthw_release_mngd_rx_virt_queue = nthw_release_mngd_rx_virt_queue,
1416 	.nthw_setup_mngd_tx_virt_queue = nthw_setup_mngd_tx_virt_queue,
1417 	.nthw_release_mngd_tx_virt_queue = nthw_release_mngd_tx_virt_queue,
1418 	.nthw_get_rx_packets = nthw_get_rx_packets,
1419 	.nthw_release_rx_packets = nthw_release_rx_packets,
1420 	.nthw_get_tx_packets = nthw_get_tx_packets,
1421 	.nthw_release_tx_packets = nthw_release_tx_packets,
1422 	.nthw_virt_queue_init = nthw_virt_queue_init
1423 };
1424 
1425 void sg_init(void)
1426 {
1427 	NT_LOG(INF, NTNIC, "SG ops initialized");
1428 	register_sg_ops(&sg_ops);
1429 }
1430