xref: /dpdk/drivers/raw/ntb/ntb.c (revision 68a03efeed657e6e05f281479b33b51102797e15)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2019 Intel Corporation.
3  */
4 #include <stdint.h>
5 #include <stdio.h>
6 #include <string.h>
7 #include <errno.h>
8 
9 #include <rte_common.h>
10 #include <rte_lcore.h>
11 #include <rte_cycles.h>
12 #include <rte_eal.h>
13 #include <rte_log.h>
14 #include <rte_pci.h>
15 #include <rte_mbuf.h>
16 #include <rte_bus_pci.h>
17 #include <rte_memzone.h>
18 #include <rte_memcpy.h>
19 #include <rte_rawdev.h>
20 #include <rte_rawdev_pmd.h>
21 
22 #include "ntb_hw_intel.h"
23 #include "rte_pmd_ntb.h"
24 #include "ntb.h"
25 
26 static const struct rte_pci_id pci_id_ntb_map[] = {
27 	{ RTE_PCI_DEVICE(NTB_INTEL_VENDOR_ID, NTB_INTEL_DEV_ID_B2B_SKX) },
28 	{ RTE_PCI_DEVICE(NTB_INTEL_VENDOR_ID, NTB_INTEL_DEV_ID_B2B_ICX) },
29 	{ .vendor_id = 0, /* sentinel */ },
30 };
31 
32 /* Align with enum ntb_xstats_idx */
33 static struct rte_rawdev_xstats_name ntb_xstats_names[] = {
34 	{"Tx-packets"},
35 	{"Tx-bytes"},
36 	{"Tx-errors"},
37 	{"Rx-packets"},
38 	{"Rx-bytes"},
39 	{"Rx-missed"},
40 };
41 #define NTB_XSTATS_NUM RTE_DIM(ntb_xstats_names)
42 
43 static inline void
44 ntb_link_cleanup(struct rte_rawdev *dev)
45 {
46 	struct ntb_hw *hw = dev->dev_private;
47 	int status, i;
48 
49 	if (hw->ntb_ops->spad_write == NULL ||
50 	    hw->ntb_ops->mw_set_trans == NULL) {
51 		NTB_LOG(ERR, "Not supported to clean up link.");
52 		return;
53 	}
54 
55 	/* Clean spad registers. */
56 	for (i = 0; i < hw->spad_cnt; i++) {
57 		status = (*hw->ntb_ops->spad_write)(dev, i, 0, 0);
58 		if (status)
59 			NTB_LOG(ERR, "Failed to clean local spad.");
60 	}
61 
62 	/* Clear mw so that peer cannot access local memory.*/
63 	for (i = 0; i < hw->used_mw_num; i++) {
64 		status = (*hw->ntb_ops->mw_set_trans)(dev, i, 0, 0);
65 		if (status)
66 			NTB_LOG(ERR, "Failed to clean mw.");
67 	}
68 }
69 
70 static inline int
71 ntb_handshake_work(const struct rte_rawdev *dev)
72 {
73 	struct ntb_hw *hw = dev->dev_private;
74 	uint32_t val;
75 	int ret, i;
76 
77 	if (hw->ntb_ops->spad_write == NULL ||
78 	    hw->ntb_ops->mw_set_trans == NULL) {
79 		NTB_LOG(ERR, "Scratchpad/MW setting is not supported.");
80 		return -ENOTSUP;
81 	}
82 
83 	/* Tell peer the mw info of local side. */
84 	ret = (*hw->ntb_ops->spad_write)(dev, SPAD_NUM_MWS, 1, hw->mw_cnt);
85 	if (ret < 0)
86 		return ret;
87 	for (i = 0; i < hw->mw_cnt; i++) {
88 		NTB_LOG(INFO, "Local %u mw size: 0x%"PRIx64"", i,
89 				hw->mw_size[i]);
90 		val = hw->mw_size[i] >> 32;
91 		ret = (*hw->ntb_ops->spad_write)(dev, SPAD_MW0_SZ_H + 2 * i,
92 						 1, val);
93 		if (ret < 0)
94 			return ret;
95 		val = hw->mw_size[i];
96 		ret = (*hw->ntb_ops->spad_write)(dev, SPAD_MW0_SZ_L + 2 * i,
97 						 1, val);
98 		if (ret < 0)
99 			return ret;
100 	}
101 
102 	/* Tell peer about the queue info and map memory to the peer. */
103 	ret = (*hw->ntb_ops->spad_write)(dev, SPAD_Q_SZ, 1, hw->queue_size);
104 	if (ret < 0)
105 		return ret;
106 	ret = (*hw->ntb_ops->spad_write)(dev, SPAD_NUM_QPS, 1,
107 					 hw->queue_pairs);
108 	if (ret < 0)
109 		return ret;
110 	ret = (*hw->ntb_ops->spad_write)(dev, SPAD_USED_MWS, 1,
111 					 hw->used_mw_num);
112 	if (ret < 0)
113 		return ret;
114 	for (i = 0; i < hw->used_mw_num; i++) {
115 		val = (uint64_t)(size_t)(hw->mz[i]->addr) >> 32;
116 		ret = (*hw->ntb_ops->spad_write)(dev, SPAD_MW0_BA_H + 2 * i,
117 						 1, val);
118 		if (ret < 0)
119 			return ret;
120 		val = (uint64_t)(size_t)(hw->mz[i]->addr);
121 		ret = (*hw->ntb_ops->spad_write)(dev, SPAD_MW0_BA_L + 2 * i,
122 						 1, val);
123 		if (ret < 0)
124 			return ret;
125 	}
126 
127 	for (i = 0; i < hw->used_mw_num; i++) {
128 		ret = (*hw->ntb_ops->mw_set_trans)(dev, i, hw->mz[i]->iova,
129 						   hw->mz[i]->len);
130 		if (ret < 0)
131 			return ret;
132 	}
133 
134 	/* Ring doorbell 0 to tell peer the device is ready. */
135 	ret = (*hw->ntb_ops->peer_db_set)(dev, 0);
136 	if (ret < 0)
137 		return ret;
138 
139 	return 0;
140 }
141 
142 static void
143 ntb_dev_intr_handler(void *param)
144 {
145 	struct rte_rawdev *dev = (struct rte_rawdev *)param;
146 	struct ntb_hw *hw = dev->dev_private;
147 	uint32_t val_h, val_l;
148 	uint64_t peer_mw_size;
149 	uint64_t db_bits = 0;
150 	uint8_t peer_mw_cnt;
151 	int i = 0;
152 
153 	if (hw->ntb_ops->db_read == NULL ||
154 	    hw->ntb_ops->db_clear == NULL ||
155 	    hw->ntb_ops->peer_db_set == NULL) {
156 		NTB_LOG(ERR, "Doorbell is not supported.");
157 		return;
158 	}
159 
160 	db_bits = (*hw->ntb_ops->db_read)(dev);
161 	if (!db_bits)
162 		NTB_LOG(ERR, "No doorbells");
163 
164 	/* Doorbell 0 is for peer device ready. */
165 	if (db_bits & 1) {
166 		NTB_LOG(INFO, "DB0: Peer device is up.");
167 		/* Clear received doorbell. */
168 		(*hw->ntb_ops->db_clear)(dev, 1);
169 
170 		/**
171 		 * Peer dev is already up. All mw settings are already done.
172 		 * Skip them.
173 		 */
174 		if (hw->peer_dev_up)
175 			return;
176 
177 		if (hw->ntb_ops->spad_read == NULL) {
178 			NTB_LOG(ERR, "Scratchpad read is not supported.");
179 			return;
180 		}
181 
182 		/* Check if mw setting on the peer is the same as local. */
183 		peer_mw_cnt = (*hw->ntb_ops->spad_read)(dev, SPAD_NUM_MWS, 0);
184 		if (peer_mw_cnt != hw->mw_cnt) {
185 			NTB_LOG(ERR, "Both mw cnt must be the same.");
186 			return;
187 		}
188 
189 		for (i = 0; i < hw->mw_cnt; i++) {
190 			val_h = (*hw->ntb_ops->spad_read)
191 				(dev, SPAD_MW0_SZ_H + 2 * i, 0);
192 			val_l = (*hw->ntb_ops->spad_read)
193 				(dev, SPAD_MW0_SZ_L + 2 * i, 0);
194 			peer_mw_size = ((uint64_t)val_h << 32) | val_l;
195 			NTB_LOG(DEBUG, "Peer %u mw size: 0x%"PRIx64"", i,
196 					peer_mw_size);
197 			if (peer_mw_size != hw->mw_size[i]) {
198 				NTB_LOG(ERR, "Mw config must be the same.");
199 				return;
200 			}
201 		}
202 
203 		hw->peer_dev_up = 1;
204 
205 		/**
206 		 * Handshake with peer. Spad_write & mw_set_trans only works
207 		 * when both devices are up. So write spad again when db is
208 		 * received. And set db again for the later device who may miss
209 		 * the 1st db.
210 		 */
211 		if (ntb_handshake_work(dev) < 0) {
212 			NTB_LOG(ERR, "Handshake work failed.");
213 			return;
214 		}
215 
216 		/* To get the link info. */
217 		if (hw->ntb_ops->get_link_status == NULL) {
218 			NTB_LOG(ERR, "Not supported to get link status.");
219 			return;
220 		}
221 		(*hw->ntb_ops->get_link_status)(dev);
222 		NTB_LOG(INFO, "Link is up. Link speed: %u. Link width: %u",
223 			hw->link_speed, hw->link_width);
224 		return;
225 	}
226 
227 	if (db_bits & (1 << 1)) {
228 		NTB_LOG(INFO, "DB1: Peer device is down.");
229 		/* Clear received doorbell. */
230 		(*hw->ntb_ops->db_clear)(dev, 2);
231 
232 		/* Peer device will be down, So clean local side too. */
233 		ntb_link_cleanup(dev);
234 
235 		hw->peer_dev_up = 0;
236 		/* Response peer's dev_stop request. */
237 		(*hw->ntb_ops->peer_db_set)(dev, 2);
238 		return;
239 	}
240 
241 	if (db_bits & (1 << 2)) {
242 		NTB_LOG(INFO, "DB2: Peer device agrees dev to be down.");
243 		/* Clear received doorbell. */
244 		(*hw->ntb_ops->db_clear)(dev, (1 << 2));
245 		hw->peer_dev_up = 0;
246 		return;
247 	}
248 
249 	/* Clear other received doorbells. */
250 	(*hw->ntb_ops->db_clear)(dev, db_bits);
251 }
252 
253 static int
254 ntb_queue_conf_get(struct rte_rawdev *dev,
255 		   uint16_t queue_id,
256 		   rte_rawdev_obj_t queue_conf,
257 		   size_t conf_size)
258 {
259 	struct ntb_queue_conf *q_conf = queue_conf;
260 	struct ntb_hw *hw = dev->dev_private;
261 
262 	if (conf_size != sizeof(*q_conf))
263 		return -EINVAL;
264 
265 	q_conf->tx_free_thresh = hw->tx_queues[queue_id]->tx_free_thresh;
266 	q_conf->nb_desc = hw->rx_queues[queue_id]->nb_rx_desc;
267 	q_conf->rx_mp = hw->rx_queues[queue_id]->mpool;
268 
269 	return 0;
270 }
271 
272 static void
273 ntb_rxq_release_mbufs(struct ntb_rx_queue *q)
274 {
275 	int i;
276 
277 	if (!q || !q->sw_ring) {
278 		NTB_LOG(ERR, "Pointer to rxq or sw_ring is NULL");
279 		return;
280 	}
281 
282 	for (i = 0; i < q->nb_rx_desc; i++) {
283 		if (q->sw_ring[i].mbuf) {
284 			rte_pktmbuf_free_seg(q->sw_ring[i].mbuf);
285 			q->sw_ring[i].mbuf = NULL;
286 		}
287 	}
288 }
289 
290 static void
291 ntb_rxq_release(struct ntb_rx_queue *rxq)
292 {
293 	if (!rxq) {
294 		NTB_LOG(ERR, "Pointer to rxq is NULL");
295 		return;
296 	}
297 
298 	ntb_rxq_release_mbufs(rxq);
299 
300 	rte_free(rxq->sw_ring);
301 	rte_free(rxq);
302 }
303 
304 static int
305 ntb_rxq_setup(struct rte_rawdev *dev,
306 	      uint16_t qp_id,
307 	      rte_rawdev_obj_t queue_conf,
308 	      size_t conf_size)
309 {
310 	struct ntb_queue_conf *rxq_conf = queue_conf;
311 	struct ntb_hw *hw = dev->dev_private;
312 	struct ntb_rx_queue *rxq;
313 
314 	if (conf_size != sizeof(*rxq_conf))
315 		return -EINVAL;
316 
317 	/* Allocate the rx queue data structure */
318 	rxq = rte_zmalloc_socket("ntb rx queue",
319 				 sizeof(struct ntb_rx_queue),
320 				 RTE_CACHE_LINE_SIZE,
321 				 dev->socket_id);
322 	if (!rxq) {
323 		NTB_LOG(ERR, "Failed to allocate memory for "
324 			    "rx queue data structure.");
325 		return -ENOMEM;
326 	}
327 
328 	if (rxq_conf->rx_mp == NULL) {
329 		NTB_LOG(ERR, "Invalid null mempool pointer.");
330 		return -EINVAL;
331 	}
332 	rxq->nb_rx_desc = rxq_conf->nb_desc;
333 	rxq->mpool = rxq_conf->rx_mp;
334 	rxq->port_id = dev->dev_id;
335 	rxq->queue_id = qp_id;
336 	rxq->hw = hw;
337 
338 	/* Allocate the software ring. */
339 	rxq->sw_ring =
340 		rte_zmalloc_socket("ntb rx sw ring",
341 				   sizeof(struct ntb_rx_entry) *
342 				   rxq->nb_rx_desc,
343 				   RTE_CACHE_LINE_SIZE,
344 				   dev->socket_id);
345 	if (!rxq->sw_ring) {
346 		ntb_rxq_release(rxq);
347 		rxq = NULL;
348 		NTB_LOG(ERR, "Failed to allocate memory for SW ring");
349 		return -ENOMEM;
350 	}
351 
352 	hw->rx_queues[qp_id] = rxq;
353 
354 	return 0;
355 }
356 
357 static void
358 ntb_txq_release_mbufs(struct ntb_tx_queue *q)
359 {
360 	int i;
361 
362 	if (!q || !q->sw_ring) {
363 		NTB_LOG(ERR, "Pointer to txq or sw_ring is NULL");
364 		return;
365 	}
366 
367 	for (i = 0; i < q->nb_tx_desc; i++) {
368 		if (q->sw_ring[i].mbuf) {
369 			rte_pktmbuf_free_seg(q->sw_ring[i].mbuf);
370 			q->sw_ring[i].mbuf = NULL;
371 		}
372 	}
373 }
374 
375 static void
376 ntb_txq_release(struct ntb_tx_queue *txq)
377 {
378 	if (!txq) {
379 		NTB_LOG(ERR, "Pointer to txq is NULL");
380 		return;
381 	}
382 
383 	ntb_txq_release_mbufs(txq);
384 
385 	rte_free(txq->sw_ring);
386 	rte_free(txq);
387 }
388 
389 static int
390 ntb_txq_setup(struct rte_rawdev *dev,
391 	      uint16_t qp_id,
392 	      rte_rawdev_obj_t queue_conf,
393 	      size_t conf_size)
394 {
395 	struct ntb_queue_conf *txq_conf = queue_conf;
396 	struct ntb_hw *hw = dev->dev_private;
397 	struct ntb_tx_queue *txq;
398 	uint16_t i, prev;
399 
400 	if (conf_size != sizeof(*txq_conf))
401 		return -EINVAL;
402 
403 	/* Allocate the TX queue data structure. */
404 	txq = rte_zmalloc_socket("ntb tx queue",
405 				  sizeof(struct ntb_tx_queue),
406 				  RTE_CACHE_LINE_SIZE,
407 				  dev->socket_id);
408 	if (!txq) {
409 		NTB_LOG(ERR, "Failed to allocate memory for "
410 			    "tx queue structure");
411 		return -ENOMEM;
412 	}
413 
414 	txq->nb_tx_desc = txq_conf->nb_desc;
415 	txq->port_id = dev->dev_id;
416 	txq->queue_id = qp_id;
417 	txq->hw = hw;
418 
419 	/* Allocate software ring */
420 	txq->sw_ring =
421 		rte_zmalloc_socket("ntb tx sw ring",
422 				   sizeof(struct ntb_tx_entry) *
423 				   txq->nb_tx_desc,
424 				   RTE_CACHE_LINE_SIZE,
425 				   dev->socket_id);
426 	if (!txq->sw_ring) {
427 		ntb_txq_release(txq);
428 		txq = NULL;
429 		NTB_LOG(ERR, "Failed to allocate memory for SW TX ring");
430 		return -ENOMEM;
431 	}
432 
433 	prev = txq->nb_tx_desc - 1;
434 	for (i = 0; i < txq->nb_tx_desc; i++) {
435 		txq->sw_ring[i].mbuf = NULL;
436 		txq->sw_ring[i].last_id = i;
437 		txq->sw_ring[prev].next_id = i;
438 		prev = i;
439 	}
440 
441 	txq->tx_free_thresh = txq_conf->tx_free_thresh ?
442 			      txq_conf->tx_free_thresh :
443 			      NTB_DFLT_TX_FREE_THRESH;
444 	if (txq->tx_free_thresh >= txq->nb_tx_desc - 3) {
445 		NTB_LOG(ERR, "tx_free_thresh must be less than nb_desc - 3. "
446 			"(tx_free_thresh=%u qp_id=%u)", txq->tx_free_thresh,
447 			qp_id);
448 		return -EINVAL;
449 	}
450 
451 	hw->tx_queues[qp_id] = txq;
452 
453 	return 0;
454 }
455 
456 
457 static int
458 ntb_queue_setup(struct rte_rawdev *dev,
459 		uint16_t queue_id,
460 		rte_rawdev_obj_t queue_conf,
461 		size_t conf_size)
462 {
463 	struct ntb_hw *hw = dev->dev_private;
464 	int ret;
465 
466 	if (queue_id >= hw->queue_pairs)
467 		return -EINVAL;
468 
469 	ret = ntb_txq_setup(dev, queue_id, queue_conf, conf_size);
470 	if (ret < 0)
471 		return ret;
472 
473 	ret = ntb_rxq_setup(dev, queue_id, queue_conf, conf_size);
474 
475 	return ret;
476 }
477 
478 static int
479 ntb_queue_release(struct rte_rawdev *dev, uint16_t queue_id)
480 {
481 	struct ntb_hw *hw = dev->dev_private;
482 
483 	if (queue_id >= hw->queue_pairs)
484 		return -EINVAL;
485 
486 	ntb_txq_release(hw->tx_queues[queue_id]);
487 	hw->tx_queues[queue_id] = NULL;
488 	ntb_rxq_release(hw->rx_queues[queue_id]);
489 	hw->rx_queues[queue_id] = NULL;
490 
491 	return 0;
492 }
493 
494 static uint16_t
495 ntb_queue_count(struct rte_rawdev *dev)
496 {
497 	struct ntb_hw *hw = dev->dev_private;
498 	return hw->queue_pairs;
499 }
500 
501 static int
502 ntb_queue_init(struct rte_rawdev *dev, uint16_t qp_id)
503 {
504 	struct ntb_hw *hw = dev->dev_private;
505 	struct ntb_rx_queue *rxq = hw->rx_queues[qp_id];
506 	struct ntb_tx_queue *txq = hw->tx_queues[qp_id];
507 	volatile struct ntb_header *local_hdr;
508 	struct ntb_header *remote_hdr;
509 	uint16_t q_size = hw->queue_size;
510 	uint32_t hdr_offset;
511 	void *bar_addr;
512 	uint16_t i;
513 
514 	if (hw->ntb_ops->get_peer_mw_addr == NULL) {
515 		NTB_LOG(ERR, "Getting peer mw addr is not supported.");
516 		return -EINVAL;
517 	}
518 
519 	/* Put queue info into the start of shared memory. */
520 	hdr_offset = hw->hdr_size_per_queue * qp_id;
521 	local_hdr = (volatile struct ntb_header *)
522 		    ((size_t)hw->mz[0]->addr + hdr_offset);
523 	bar_addr = (*hw->ntb_ops->get_peer_mw_addr)(dev, 0);
524 	if (bar_addr == NULL)
525 		return -EINVAL;
526 	remote_hdr = (struct ntb_header *)
527 		     ((size_t)bar_addr + hdr_offset);
528 
529 	/* rxq init. */
530 	rxq->rx_desc_ring = (struct ntb_desc *)
531 			    (&remote_hdr->desc_ring);
532 	rxq->rx_used_ring = (volatile struct ntb_used *)
533 			    (&local_hdr->desc_ring[q_size]);
534 	rxq->avail_cnt = &remote_hdr->avail_cnt;
535 	rxq->used_cnt = &local_hdr->used_cnt;
536 
537 	for (i = 0; i < rxq->nb_rx_desc - 1; i++) {
538 		struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mpool);
539 		if (unlikely(!mbuf)) {
540 			NTB_LOG(ERR, "Failed to allocate mbuf for RX");
541 			return -ENOMEM;
542 		}
543 		mbuf->port = dev->dev_id;
544 
545 		rxq->sw_ring[i].mbuf = mbuf;
546 
547 		rxq->rx_desc_ring[i].addr = rte_pktmbuf_mtod(mbuf, size_t);
548 		rxq->rx_desc_ring[i].len = mbuf->buf_len - RTE_PKTMBUF_HEADROOM;
549 	}
550 	rte_wmb();
551 	*rxq->avail_cnt = rxq->nb_rx_desc - 1;
552 	rxq->last_avail = rxq->nb_rx_desc - 1;
553 	rxq->last_used = 0;
554 
555 	/* txq init */
556 	txq->tx_desc_ring = (volatile struct ntb_desc *)
557 			    (&local_hdr->desc_ring);
558 	txq->tx_used_ring = (struct ntb_used *)
559 			    (&remote_hdr->desc_ring[q_size]);
560 	txq->avail_cnt = &local_hdr->avail_cnt;
561 	txq->used_cnt = &remote_hdr->used_cnt;
562 
563 	rte_wmb();
564 	*txq->used_cnt = 0;
565 	txq->last_used = 0;
566 	txq->last_avail = 0;
567 	txq->nb_tx_free = txq->nb_tx_desc - 1;
568 
569 	/* Set per queue stats. */
570 	for (i = 0; i < NTB_XSTATS_NUM; i++) {
571 		hw->ntb_xstats[i + NTB_XSTATS_NUM * (qp_id + 1)] = 0;
572 		hw->ntb_xstats_off[i + NTB_XSTATS_NUM * (qp_id + 1)] = 0;
573 	}
574 
575 	return 0;
576 }
577 
578 static inline void
579 ntb_enqueue_cleanup(struct ntb_tx_queue *txq)
580 {
581 	struct ntb_tx_entry *sw_ring = txq->sw_ring;
582 	uint16_t tx_free = txq->last_avail;
583 	uint16_t nb_to_clean, i;
584 
585 	/* avail_cnt + 1 represents where to rx next in the peer. */
586 	nb_to_clean = (*txq->avail_cnt - txq->last_avail + 1 +
587 			txq->nb_tx_desc) & (txq->nb_tx_desc - 1);
588 	nb_to_clean = RTE_MIN(nb_to_clean, txq->tx_free_thresh);
589 	for (i = 0; i < nb_to_clean; i++) {
590 		if (sw_ring[tx_free].mbuf)
591 			rte_pktmbuf_free_seg(sw_ring[tx_free].mbuf);
592 		tx_free = (tx_free + 1) & (txq->nb_tx_desc - 1);
593 	}
594 
595 	txq->nb_tx_free += nb_to_clean;
596 	txq->last_avail = tx_free;
597 }
598 
599 static int
600 ntb_enqueue_bufs(struct rte_rawdev *dev,
601 		 struct rte_rawdev_buf **buffers,
602 		 unsigned int count,
603 		 rte_rawdev_obj_t context)
604 {
605 	struct ntb_hw *hw = dev->dev_private;
606 	struct ntb_tx_queue *txq = hw->tx_queues[(size_t)context];
607 	struct ntb_tx_entry *sw_ring = txq->sw_ring;
608 	struct rte_mbuf *txm;
609 	struct ntb_used tx_used[NTB_MAX_DESC_SIZE];
610 	volatile struct ntb_desc *tx_item;
611 	uint16_t tx_last, nb_segs, off, last_used, avail_cnt;
612 	uint16_t nb_mbufs = 0;
613 	uint16_t nb_tx = 0;
614 	uint64_t bytes = 0;
615 	void *buf_addr;
616 	int i;
617 
618 	if (unlikely(hw->ntb_ops->ioremap == NULL)) {
619 		NTB_LOG(ERR, "Ioremap not supported.");
620 		return nb_tx;
621 	}
622 
623 	if (unlikely(dev->started == 0 || hw->peer_dev_up == 0)) {
624 		NTB_LOG(DEBUG, "Link is not up.");
625 		return nb_tx;
626 	}
627 
628 	if (txq->nb_tx_free < txq->tx_free_thresh)
629 		ntb_enqueue_cleanup(txq);
630 
631 	off = NTB_XSTATS_NUM * ((size_t)context + 1);
632 	last_used = txq->last_used;
633 	avail_cnt = *txq->avail_cnt;/* Where to alloc next. */
634 	for (nb_tx = 0; nb_tx < count; nb_tx++) {
635 		txm = (struct rte_mbuf *)(buffers[nb_tx]->buf_addr);
636 		if (txm == NULL || txq->nb_tx_free < txm->nb_segs)
637 			break;
638 
639 		tx_last = (txq->last_used + txm->nb_segs - 1) &
640 			  (txq->nb_tx_desc - 1);
641 		nb_segs = txm->nb_segs;
642 		for (i = 0; i < nb_segs; i++) {
643 			/* Not enough ring space for tx. */
644 			if (txq->last_used == avail_cnt)
645 				goto end_of_tx;
646 			sw_ring[txq->last_used].mbuf = txm;
647 			tx_item = txq->tx_desc_ring + txq->last_used;
648 
649 			if (!tx_item->len) {
650 				(hw->ntb_xstats[NTB_TX_ERRS_ID + off])++;
651 				goto end_of_tx;
652 			}
653 			if (txm->data_len > tx_item->len) {
654 				NTB_LOG(ERR, "Data length exceeds buf length."
655 					" Only %u data would be transmitted.",
656 					tx_item->len);
657 				txm->data_len = tx_item->len;
658 			}
659 
660 			/* translate remote virtual addr to bar virtual addr */
661 			buf_addr = (*hw->ntb_ops->ioremap)(dev, tx_item->addr);
662 			if (buf_addr == NULL) {
663 				(hw->ntb_xstats[NTB_TX_ERRS_ID + off])++;
664 				NTB_LOG(ERR, "Null remap addr.");
665 				goto end_of_tx;
666 			}
667 			rte_memcpy(buf_addr, rte_pktmbuf_mtod(txm, void *),
668 				   txm->data_len);
669 
670 			tx_used[nb_mbufs].len = txm->data_len;
671 			tx_used[nb_mbufs++].flags = (txq->last_used ==
672 						    tx_last) ?
673 						    NTB_FLAG_EOP : 0;
674 
675 			/* update stats */
676 			bytes += txm->data_len;
677 
678 			txm = txm->next;
679 
680 			sw_ring[txq->last_used].next_id = (txq->last_used + 1) &
681 						  (txq->nb_tx_desc - 1);
682 			sw_ring[txq->last_used].last_id = tx_last;
683 			txq->last_used = (txq->last_used + 1) &
684 					 (txq->nb_tx_desc - 1);
685 		}
686 		txq->nb_tx_free -= nb_segs;
687 	}
688 
689 end_of_tx:
690 	if (nb_tx) {
691 		uint16_t nb1, nb2;
692 		if (nb_mbufs > txq->nb_tx_desc - last_used) {
693 			nb1 = txq->nb_tx_desc - last_used;
694 			nb2 = nb_mbufs - txq->nb_tx_desc + last_used;
695 		} else {
696 			nb1 = nb_mbufs;
697 			nb2 = 0;
698 		}
699 		rte_memcpy(txq->tx_used_ring + last_used, tx_used,
700 			   sizeof(struct ntb_used) * nb1);
701 		rte_memcpy(txq->tx_used_ring, tx_used + nb1,
702 			   sizeof(struct ntb_used) * nb2);
703 		rte_wmb();
704 		*txq->used_cnt = txq->last_used;
705 
706 		/* update queue stats */
707 		hw->ntb_xstats[NTB_TX_BYTES_ID + off] += bytes;
708 		hw->ntb_xstats[NTB_TX_PKTS_ID + off] += nb_tx;
709 	}
710 
711 	return nb_tx;
712 }
713 
714 static int
715 ntb_dequeue_bufs(struct rte_rawdev *dev,
716 		 struct rte_rawdev_buf **buffers,
717 		 unsigned int count,
718 		 rte_rawdev_obj_t context)
719 {
720 	struct ntb_hw *hw = dev->dev_private;
721 	struct ntb_rx_queue *rxq = hw->rx_queues[(size_t)context];
722 	struct ntb_rx_entry *sw_ring = rxq->sw_ring;
723 	struct ntb_desc rx_desc[NTB_MAX_DESC_SIZE];
724 	struct rte_mbuf *first, *rxm_t;
725 	struct rte_mbuf *prev = NULL;
726 	volatile struct ntb_used *rx_item;
727 	uint16_t nb_mbufs = 0;
728 	uint16_t nb_rx = 0;
729 	uint64_t bytes = 0;
730 	uint16_t off, last_avail, used_cnt, used_nb;
731 	int i;
732 
733 	if (unlikely(dev->started == 0 || hw->peer_dev_up == 0)) {
734 		NTB_LOG(DEBUG, "Link is not up");
735 		return nb_rx;
736 	}
737 
738 	used_cnt = *rxq->used_cnt;
739 
740 	if (rxq->last_used == used_cnt)
741 		return nb_rx;
742 
743 	last_avail = rxq->last_avail;
744 	used_nb = (used_cnt - rxq->last_used) & (rxq->nb_rx_desc - 1);
745 	count = RTE_MIN(count, used_nb);
746 	for (nb_rx = 0; nb_rx < count; nb_rx++) {
747 		i = 0;
748 		while (true) {
749 			rx_item = rxq->rx_used_ring + rxq->last_used;
750 			rxm_t = sw_ring[rxq->last_used].mbuf;
751 			rxm_t->data_len = rx_item->len;
752 			rxm_t->data_off = RTE_PKTMBUF_HEADROOM;
753 			rxm_t->port = rxq->port_id;
754 
755 			if (!i) {
756 				rxm_t->nb_segs = 1;
757 				first = rxm_t;
758 				first->pkt_len = 0;
759 				buffers[nb_rx]->buf_addr = rxm_t;
760 			} else {
761 				prev->next = rxm_t;
762 				first->nb_segs++;
763 			}
764 
765 			prev = rxm_t;
766 			first->pkt_len += prev->data_len;
767 			rxq->last_used = (rxq->last_used + 1) &
768 					 (rxq->nb_rx_desc - 1);
769 
770 			/* alloc new mbuf */
771 			rxm_t = rte_mbuf_raw_alloc(rxq->mpool);
772 			if (unlikely(rxm_t == NULL)) {
773 				NTB_LOG(ERR, "recv alloc mbuf failed.");
774 				goto end_of_rx;
775 			}
776 			rxm_t->port = rxq->port_id;
777 			sw_ring[rxq->last_avail].mbuf = rxm_t;
778 			i++;
779 
780 			/* fill new desc */
781 			rx_desc[nb_mbufs].addr =
782 					rte_pktmbuf_mtod(rxm_t, size_t);
783 			rx_desc[nb_mbufs++].len = rxm_t->buf_len -
784 						  RTE_PKTMBUF_HEADROOM;
785 			rxq->last_avail = (rxq->last_avail + 1) &
786 					  (rxq->nb_rx_desc - 1);
787 
788 			if (rx_item->flags & NTB_FLAG_EOP)
789 				break;
790 		}
791 		/* update stats */
792 		bytes += first->pkt_len;
793 	}
794 
795 end_of_rx:
796 	if (nb_rx) {
797 		uint16_t nb1, nb2;
798 		if (nb_mbufs > rxq->nb_rx_desc - last_avail) {
799 			nb1 = rxq->nb_rx_desc - last_avail;
800 			nb2 = nb_mbufs - rxq->nb_rx_desc + last_avail;
801 		} else {
802 			nb1 = nb_mbufs;
803 			nb2 = 0;
804 		}
805 		rte_memcpy(rxq->rx_desc_ring + last_avail, rx_desc,
806 			   sizeof(struct ntb_desc) * nb1);
807 		rte_memcpy(rxq->rx_desc_ring, rx_desc + nb1,
808 			   sizeof(struct ntb_desc) * nb2);
809 		rte_wmb();
810 		*rxq->avail_cnt = rxq->last_avail;
811 
812 		/* update queue stats */
813 		off = NTB_XSTATS_NUM * ((size_t)context + 1);
814 		hw->ntb_xstats[NTB_RX_BYTES_ID + off] += bytes;
815 		hw->ntb_xstats[NTB_RX_PKTS_ID + off] += nb_rx;
816 		hw->ntb_xstats[NTB_RX_MISS_ID + off] += (count - nb_rx);
817 	}
818 
819 	return nb_rx;
820 }
821 
822 static int
823 ntb_dev_info_get(struct rte_rawdev *dev, rte_rawdev_obj_t dev_info,
824 		size_t dev_info_size)
825 {
826 	struct ntb_hw *hw = dev->dev_private;
827 	struct ntb_dev_info *info = dev_info;
828 
829 	if (dev_info_size != sizeof(*info)) {
830 		NTB_LOG(ERR, "Invalid size parameter to %s", __func__);
831 		return -EINVAL;
832 	}
833 
834 	info->mw_cnt = hw->mw_cnt;
835 	info->mw_size = hw->mw_size;
836 
837 	/**
838 	 * Intel hardware requires that mapped memory base address should be
839 	 * aligned with EMBARSZ and needs continuous memzone.
840 	 */
841 	info->mw_size_align = (uint8_t)(hw->pci_dev->id.vendor_id ==
842 					NTB_INTEL_VENDOR_ID);
843 
844 	if (!hw->queue_size || !hw->queue_pairs) {
845 		NTB_LOG(ERR, "No queue size and queue num assigned.");
846 		return -EAGAIN;
847 	}
848 
849 	hw->hdr_size_per_queue = RTE_ALIGN(sizeof(struct ntb_header) +
850 				hw->queue_size * sizeof(struct ntb_desc) +
851 				hw->queue_size * sizeof(struct ntb_used),
852 				RTE_CACHE_LINE_SIZE);
853 	info->ntb_hdr_size = hw->hdr_size_per_queue * hw->queue_pairs;
854 
855 	return 0;
856 }
857 
858 static int
859 ntb_dev_configure(const struct rte_rawdev *dev, rte_rawdev_obj_t config,
860 		size_t config_size)
861 {
862 	struct ntb_dev_config *conf = config;
863 	struct ntb_hw *hw = dev->dev_private;
864 	uint32_t xstats_num;
865 	int ret;
866 
867 	if (conf == NULL || config_size != sizeof(*conf))
868 		return -EINVAL;
869 
870 	hw->queue_pairs	= conf->num_queues;
871 	hw->queue_size = conf->queue_size;
872 	hw->used_mw_num = conf->mz_num;
873 	hw->mz = conf->mz_list;
874 	hw->rx_queues = rte_zmalloc("ntb_rx_queues",
875 			sizeof(struct ntb_rx_queue *) * hw->queue_pairs, 0);
876 	hw->tx_queues = rte_zmalloc("ntb_tx_queues",
877 			sizeof(struct ntb_tx_queue *) * hw->queue_pairs, 0);
878 	/* First total stats, then per queue stats. */
879 	xstats_num = (hw->queue_pairs + 1) * NTB_XSTATS_NUM;
880 	hw->ntb_xstats = rte_zmalloc("ntb_xstats", xstats_num *
881 				     sizeof(uint64_t), 0);
882 	hw->ntb_xstats_off = rte_zmalloc("ntb_xstats_off", xstats_num *
883 					 sizeof(uint64_t), 0);
884 
885 	/* Start handshake with the peer. */
886 	ret = ntb_handshake_work(dev);
887 	if (ret < 0) {
888 		rte_free(hw->rx_queues);
889 		rte_free(hw->tx_queues);
890 		hw->rx_queues = NULL;
891 		hw->tx_queues = NULL;
892 		return ret;
893 	}
894 
895 	return 0;
896 }
897 
898 static int
899 ntb_dev_start(struct rte_rawdev *dev)
900 {
901 	struct ntb_hw *hw = dev->dev_private;
902 	uint32_t peer_base_l, peer_val;
903 	uint64_t peer_base_h;
904 	uint32_t i;
905 	int ret;
906 
907 	if (!hw->link_status || !hw->peer_dev_up)
908 		return -EINVAL;
909 
910 	/* Set total stats. */
911 	for (i = 0; i < NTB_XSTATS_NUM; i++) {
912 		hw->ntb_xstats[i] = 0;
913 		hw->ntb_xstats_off[i] = 0;
914 	}
915 
916 	for (i = 0; i < hw->queue_pairs; i++) {
917 		ret = ntb_queue_init(dev, i);
918 		if (ret) {
919 			NTB_LOG(ERR, "Failed to init queue.");
920 			goto err_q_init;
921 		}
922 	}
923 
924 	hw->peer_mw_base = rte_zmalloc("ntb_peer_mw_base", hw->mw_cnt *
925 					sizeof(uint64_t), 0);
926 
927 	if (hw->ntb_ops->spad_read == NULL) {
928 		ret = -ENOTSUP;
929 		goto err_up;
930 	}
931 
932 	peer_val = (*hw->ntb_ops->spad_read)(dev, SPAD_Q_SZ, 0);
933 	if (peer_val != hw->queue_size) {
934 		NTB_LOG(ERR, "Inconsistent queue size! (local: %u peer: %u)",
935 			hw->queue_size, peer_val);
936 		ret = -EINVAL;
937 		goto err_up;
938 	}
939 
940 	peer_val = (*hw->ntb_ops->spad_read)(dev, SPAD_NUM_QPS, 0);
941 	if (peer_val != hw->queue_pairs) {
942 		NTB_LOG(ERR, "Inconsistent number of queues! (local: %u peer:"
943 			" %u)", hw->queue_pairs, peer_val);
944 		ret = -EINVAL;
945 		goto err_up;
946 	}
947 
948 	hw->peer_used_mws = (*hw->ntb_ops->spad_read)(dev, SPAD_USED_MWS, 0);
949 
950 	for (i = 0; i < hw->peer_used_mws; i++) {
951 		peer_base_h = (*hw->ntb_ops->spad_read)(dev,
952 				SPAD_MW0_BA_H + 2 * i, 0);
953 		peer_base_l = (*hw->ntb_ops->spad_read)(dev,
954 				SPAD_MW0_BA_L + 2 * i, 0);
955 		hw->peer_mw_base[i] = (peer_base_h << 32) + peer_base_l;
956 	}
957 
958 	dev->started = 1;
959 
960 	return 0;
961 
962 err_up:
963 	rte_free(hw->peer_mw_base);
964 err_q_init:
965 	for (i = 0; i < hw->queue_pairs; i++) {
966 		ntb_rxq_release_mbufs(hw->rx_queues[i]);
967 		ntb_txq_release_mbufs(hw->tx_queues[i]);
968 	}
969 
970 	return ret;
971 }
972 
973 static void
974 ntb_dev_stop(struct rte_rawdev *dev)
975 {
976 	struct ntb_hw *hw = dev->dev_private;
977 	uint32_t time_out;
978 	int status, i;
979 
980 	if (!hw->peer_dev_up)
981 		goto clean;
982 
983 	ntb_link_cleanup(dev);
984 
985 	/* Notify the peer that device will be down. */
986 	if (hw->ntb_ops->peer_db_set == NULL) {
987 		NTB_LOG(ERR, "Peer doorbell setting is not supported.");
988 		return;
989 	}
990 	status = (*hw->ntb_ops->peer_db_set)(dev, 1);
991 	if (status) {
992 		NTB_LOG(ERR, "Failed to tell peer device is down.");
993 		return;
994 	}
995 
996 	/*
997 	 * Set time out as 1s in case that the peer is stopped accidently
998 	 * without any notification.
999 	 */
1000 	time_out = 1000000;
1001 
1002 	/* Wait for cleanup work down before db mask clear. */
1003 	while (hw->peer_dev_up && time_out) {
1004 		time_out -= 10;
1005 		rte_delay_us(10);
1006 	}
1007 
1008 clean:
1009 	/* Clear doorbells mask. */
1010 	if (hw->ntb_ops->db_set_mask == NULL) {
1011 		NTB_LOG(ERR, "Doorbell mask setting is not supported.");
1012 		return;
1013 	}
1014 	status = (*hw->ntb_ops->db_set_mask)(dev,
1015 				(((uint64_t)1 << hw->db_cnt) - 1));
1016 	if (status)
1017 		NTB_LOG(ERR, "Failed to clear doorbells.");
1018 
1019 	for (i = 0; i < hw->queue_pairs; i++) {
1020 		ntb_rxq_release_mbufs(hw->rx_queues[i]);
1021 		ntb_txq_release_mbufs(hw->tx_queues[i]);
1022 	}
1023 
1024 	dev->started = 0;
1025 }
1026 
1027 static int
1028 ntb_dev_close(struct rte_rawdev *dev)
1029 {
1030 	struct ntb_hw *hw = dev->dev_private;
1031 	struct rte_intr_handle *intr_handle;
1032 	int i;
1033 
1034 	if (dev->started)
1035 		ntb_dev_stop(dev);
1036 
1037 	/* free queues */
1038 	for (i = 0; i < hw->queue_pairs; i++)
1039 		ntb_queue_release(dev, i);
1040 	hw->queue_pairs = 0;
1041 
1042 	intr_handle = &hw->pci_dev->intr_handle;
1043 	/* Clean datapath event and vec mapping */
1044 	rte_intr_efd_disable(intr_handle);
1045 	if (intr_handle->intr_vec) {
1046 		rte_free(intr_handle->intr_vec);
1047 		intr_handle->intr_vec = NULL;
1048 	}
1049 	/* Disable uio intr before callback unregister */
1050 	rte_intr_disable(intr_handle);
1051 
1052 	/* Unregister callback func to eal lib */
1053 	rte_intr_callback_unregister(intr_handle,
1054 				     ntb_dev_intr_handler, dev);
1055 
1056 	return 0;
1057 }
1058 
1059 static int
1060 ntb_dev_reset(struct rte_rawdev *rawdev __rte_unused)
1061 {
1062 	return 0;
1063 }
1064 
1065 static int
1066 ntb_attr_set(struct rte_rawdev *dev, const char *attr_name,
1067 	     uint64_t attr_value)
1068 {
1069 	struct ntb_hw *hw;
1070 	int index;
1071 
1072 	if (dev == NULL || attr_name == NULL) {
1073 		NTB_LOG(ERR, "Invalid arguments for setting attributes");
1074 		return -EINVAL;
1075 	}
1076 
1077 	hw = dev->dev_private;
1078 
1079 	if (!strncmp(attr_name, NTB_SPAD_USER, NTB_SPAD_USER_LEN)) {
1080 		if (hw->ntb_ops->spad_write == NULL)
1081 			return -ENOTSUP;
1082 		index = atoi(&attr_name[NTB_SPAD_USER_LEN]);
1083 		(*hw->ntb_ops->spad_write)(dev, hw->spad_user_list[index],
1084 					   1, attr_value);
1085 		NTB_LOG(DEBUG, "Set attribute (%s) Value (%" PRIu64 ")",
1086 			attr_name, attr_value);
1087 		return 0;
1088 	}
1089 
1090 	if (!strncmp(attr_name, NTB_QUEUE_SZ_NAME, NTB_ATTR_NAME_LEN)) {
1091 		hw->queue_size = attr_value;
1092 		NTB_LOG(DEBUG, "Set attribute (%s) Value (%" PRIu64 ")",
1093 			attr_name, attr_value);
1094 		return 0;
1095 	}
1096 
1097 	if (!strncmp(attr_name, NTB_QUEUE_NUM_NAME, NTB_ATTR_NAME_LEN)) {
1098 		hw->queue_pairs = attr_value;
1099 		NTB_LOG(DEBUG, "Set attribute (%s) Value (%" PRIu64 ")",
1100 			attr_name, attr_value);
1101 		return 0;
1102 	}
1103 
1104 	/* Attribute not found. */
1105 	NTB_LOG(ERR, "Attribute not found.");
1106 	return -EINVAL;
1107 }
1108 
1109 static int
1110 ntb_attr_get(struct rte_rawdev *dev, const char *attr_name,
1111 	     uint64_t *attr_value)
1112 {
1113 	struct ntb_hw *hw;
1114 	int index;
1115 
1116 	if (dev == NULL || attr_name == NULL || attr_value == NULL) {
1117 		NTB_LOG(ERR, "Invalid arguments for getting attributes");
1118 		return -EINVAL;
1119 	}
1120 
1121 	hw = dev->dev_private;
1122 
1123 	if (!strncmp(attr_name, NTB_TOPO_NAME, NTB_ATTR_NAME_LEN)) {
1124 		*attr_value = hw->topo;
1125 		NTB_LOG(DEBUG, "Attribute (%s) Value (%" PRIu64 ")",
1126 			attr_name, *attr_value);
1127 		return 0;
1128 	}
1129 
1130 	if (!strncmp(attr_name, NTB_LINK_STATUS_NAME, NTB_ATTR_NAME_LEN)) {
1131 		/* hw->link_status only indicates hw link status. */
1132 		*attr_value = hw->link_status && hw->peer_dev_up;
1133 		NTB_LOG(DEBUG, "Attribute (%s) Value (%" PRIu64 ")",
1134 			attr_name, *attr_value);
1135 		return 0;
1136 	}
1137 
1138 	if (!strncmp(attr_name, NTB_SPEED_NAME, NTB_ATTR_NAME_LEN)) {
1139 		*attr_value = hw->link_speed;
1140 		NTB_LOG(DEBUG, "Attribute (%s) Value (%" PRIu64 ")",
1141 			attr_name, *attr_value);
1142 		return 0;
1143 	}
1144 
1145 	if (!strncmp(attr_name, NTB_WIDTH_NAME, NTB_ATTR_NAME_LEN)) {
1146 		*attr_value = hw->link_width;
1147 		NTB_LOG(DEBUG, "Attribute (%s) Value (%" PRIu64 ")",
1148 			attr_name, *attr_value);
1149 		return 0;
1150 	}
1151 
1152 	if (!strncmp(attr_name, NTB_MW_CNT_NAME, NTB_ATTR_NAME_LEN)) {
1153 		*attr_value = hw->mw_cnt;
1154 		NTB_LOG(DEBUG, "Attribute (%s) Value (%" PRIu64 ")",
1155 			attr_name, *attr_value);
1156 		return 0;
1157 	}
1158 
1159 	if (!strncmp(attr_name, NTB_DB_CNT_NAME, NTB_ATTR_NAME_LEN)) {
1160 		*attr_value = hw->db_cnt;
1161 		NTB_LOG(DEBUG, "Attribute (%s) Value (%" PRIu64 ")",
1162 			attr_name, *attr_value);
1163 		return 0;
1164 	}
1165 
1166 	if (!strncmp(attr_name, NTB_SPAD_CNT_NAME, NTB_ATTR_NAME_LEN)) {
1167 		*attr_value = hw->spad_cnt;
1168 		NTB_LOG(DEBUG, "Attribute (%s) Value (%" PRIu64 ")",
1169 			attr_name, *attr_value);
1170 		return 0;
1171 	}
1172 
1173 	if (!strncmp(attr_name, NTB_SPAD_USER, NTB_SPAD_USER_LEN)) {
1174 		if (hw->ntb_ops->spad_read == NULL)
1175 			return -ENOTSUP;
1176 		index = atoi(&attr_name[NTB_SPAD_USER_LEN]);
1177 		*attr_value = (*hw->ntb_ops->spad_read)(dev,
1178 				hw->spad_user_list[index], 0);
1179 		NTB_LOG(DEBUG, "Attribute (%s) Value (%" PRIu64 ")",
1180 			attr_name, *attr_value);
1181 		return 0;
1182 	}
1183 
1184 	/* Attribute not found. */
1185 	NTB_LOG(ERR, "Attribute not found.");
1186 	return -EINVAL;
1187 }
1188 
1189 static inline uint64_t
1190 ntb_stats_update(uint64_t offset, uint64_t stat)
1191 {
1192 	if (stat >= offset)
1193 		return (stat - offset);
1194 	else
1195 		return (uint64_t)(((uint64_t)-1) - offset + stat + 1);
1196 }
1197 
1198 static int
1199 ntb_xstats_get(const struct rte_rawdev *dev,
1200 	       const unsigned int ids[],
1201 	       uint64_t values[],
1202 	       unsigned int n)
1203 {
1204 	struct ntb_hw *hw = dev->dev_private;
1205 	uint32_t i, j, off, xstats_num;
1206 
1207 	/* Calculate total stats of all queues. */
1208 	for (i = 0; i < NTB_XSTATS_NUM; i++) {
1209 		hw->ntb_xstats[i] = 0;
1210 		for (j = 0; j < hw->queue_pairs; j++) {
1211 			off = NTB_XSTATS_NUM * (j + 1) + i;
1212 			hw->ntb_xstats[i] +=
1213 			ntb_stats_update(hw->ntb_xstats_off[off],
1214 					 hw->ntb_xstats[off]);
1215 		}
1216 	}
1217 
1218 	xstats_num = NTB_XSTATS_NUM * (hw->queue_pairs + 1);
1219 	for (i = 0; i < n && ids[i] < xstats_num; i++) {
1220 		if (ids[i] < NTB_XSTATS_NUM)
1221 			values[i] = hw->ntb_xstats[ids[i]];
1222 		else
1223 			values[i] =
1224 			ntb_stats_update(hw->ntb_xstats_off[ids[i]],
1225 					 hw->ntb_xstats[ids[i]]);
1226 	}
1227 
1228 	return i;
1229 }
1230 
1231 static int
1232 ntb_xstats_get_names(const struct rte_rawdev *dev,
1233 		     struct rte_rawdev_xstats_name *xstats_names,
1234 		     unsigned int size)
1235 {
1236 	struct ntb_hw *hw = dev->dev_private;
1237 	uint32_t xstats_num, i, j, off;
1238 
1239 	xstats_num = NTB_XSTATS_NUM * (hw->queue_pairs + 1);
1240 	if (xstats_names == NULL || size < xstats_num)
1241 		return xstats_num;
1242 
1243 	/* Total stats names */
1244 	memcpy(xstats_names, ntb_xstats_names, sizeof(ntb_xstats_names));
1245 
1246 	/* Queue stats names */
1247 	for (i = 0; i < hw->queue_pairs; i++) {
1248 		for (j = 0; j < NTB_XSTATS_NUM; j++) {
1249 			off = j + (i + 1) * NTB_XSTATS_NUM;
1250 			snprintf(xstats_names[off].name,
1251 				sizeof(xstats_names[0].name),
1252 				"%s_q%u", ntb_xstats_names[j].name, i);
1253 		}
1254 	}
1255 
1256 	return xstats_num;
1257 }
1258 
1259 static uint64_t
1260 ntb_xstats_get_by_name(const struct rte_rawdev *dev,
1261 		       const char *name, unsigned int *id)
1262 {
1263 	struct rte_rawdev_xstats_name *xstats_names;
1264 	struct ntb_hw *hw = dev->dev_private;
1265 	uint32_t xstats_num, i, j, off;
1266 
1267 	if (name == NULL)
1268 		return -EINVAL;
1269 
1270 	xstats_num = NTB_XSTATS_NUM * (hw->queue_pairs + 1);
1271 	xstats_names = rte_zmalloc("ntb_stats_name",
1272 				   sizeof(struct rte_rawdev_xstats_name) *
1273 				   xstats_num, 0);
1274 	ntb_xstats_get_names(dev, xstats_names, xstats_num);
1275 
1276 	/* Calculate total stats of all queues. */
1277 	for (i = 0; i < NTB_XSTATS_NUM; i++) {
1278 		for (j = 0; j < hw->queue_pairs; j++) {
1279 			off = NTB_XSTATS_NUM * (j + 1) + i;
1280 			hw->ntb_xstats[i] +=
1281 			ntb_stats_update(hw->ntb_xstats_off[off],
1282 					 hw->ntb_xstats[off]);
1283 		}
1284 	}
1285 
1286 	for (i = 0; i < xstats_num; i++) {
1287 		if (!strncmp(name, xstats_names[i].name,
1288 		    RTE_RAW_DEV_XSTATS_NAME_SIZE)) {
1289 			*id = i;
1290 			rte_free(xstats_names);
1291 			if (i < NTB_XSTATS_NUM)
1292 				return hw->ntb_xstats[i];
1293 			else
1294 				return ntb_stats_update(hw->ntb_xstats_off[i],
1295 							hw->ntb_xstats[i]);
1296 		}
1297 	}
1298 
1299 	NTB_LOG(ERR, "Cannot find the xstats name.");
1300 
1301 	return -EINVAL;
1302 }
1303 
1304 static int
1305 ntb_xstats_reset(struct rte_rawdev *dev,
1306 		 const uint32_t ids[],
1307 		 uint32_t nb_ids)
1308 {
1309 	struct ntb_hw *hw = dev->dev_private;
1310 	uint32_t i, j, off, xstats_num;
1311 
1312 	xstats_num = NTB_XSTATS_NUM * (hw->queue_pairs + 1);
1313 	for (i = 0; i < nb_ids && ids[i] < xstats_num; i++) {
1314 		if (ids[i] < NTB_XSTATS_NUM) {
1315 			for (j = 0; j < hw->queue_pairs; j++) {
1316 				off = NTB_XSTATS_NUM * (j + 1) + ids[i];
1317 				hw->ntb_xstats_off[off] = hw->ntb_xstats[off];
1318 			}
1319 		} else {
1320 			hw->ntb_xstats_off[ids[i]] = hw->ntb_xstats[ids[i]];
1321 		}
1322 	}
1323 
1324 	return i;
1325 }
1326 
1327 static const struct rte_rawdev_ops ntb_ops = {
1328 	.dev_info_get         = ntb_dev_info_get,
1329 	.dev_configure        = ntb_dev_configure,
1330 	.dev_start            = ntb_dev_start,
1331 	.dev_stop             = ntb_dev_stop,
1332 	.dev_close            = ntb_dev_close,
1333 	.dev_reset            = ntb_dev_reset,
1334 
1335 	.queue_def_conf       = ntb_queue_conf_get,
1336 	.queue_setup          = ntb_queue_setup,
1337 	.queue_release        = ntb_queue_release,
1338 	.queue_count          = ntb_queue_count,
1339 
1340 	.enqueue_bufs         = ntb_enqueue_bufs,
1341 	.dequeue_bufs         = ntb_dequeue_bufs,
1342 
1343 	.attr_get             = ntb_attr_get,
1344 	.attr_set             = ntb_attr_set,
1345 
1346 	.xstats_get           = ntb_xstats_get,
1347 	.xstats_get_names     = ntb_xstats_get_names,
1348 	.xstats_get_by_name   = ntb_xstats_get_by_name,
1349 	.xstats_reset         = ntb_xstats_reset,
1350 };
1351 
1352 static int
1353 ntb_init_hw(struct rte_rawdev *dev, struct rte_pci_device *pci_dev)
1354 {
1355 	struct ntb_hw *hw = dev->dev_private;
1356 	struct rte_intr_handle *intr_handle;
1357 	int ret, i;
1358 
1359 	hw->pci_dev = pci_dev;
1360 	hw->peer_dev_up = 0;
1361 	hw->link_status = NTB_LINK_DOWN;
1362 	hw->link_speed = NTB_SPEED_NONE;
1363 	hw->link_width = NTB_WIDTH_NONE;
1364 
1365 	switch (pci_dev->id.device_id) {
1366 	case NTB_INTEL_DEV_ID_B2B_SKX:
1367 	case NTB_INTEL_DEV_ID_B2B_ICX:
1368 		hw->ntb_ops = &intel_ntb_ops;
1369 		break;
1370 	default:
1371 		NTB_LOG(ERR, "Not supported device.");
1372 		return -EINVAL;
1373 	}
1374 
1375 	if (hw->ntb_ops->ntb_dev_init == NULL)
1376 		return -ENOTSUP;
1377 	ret = (*hw->ntb_ops->ntb_dev_init)(dev);
1378 	if (ret) {
1379 		NTB_LOG(ERR, "Unable to init ntb dev.");
1380 		return ret;
1381 	}
1382 
1383 	if (hw->ntb_ops->set_link == NULL)
1384 		return -ENOTSUP;
1385 	ret = (*hw->ntb_ops->set_link)(dev, 1);
1386 	if (ret)
1387 		return ret;
1388 
1389 	/* Init doorbell. */
1390 	hw->db_valid_mask = RTE_LEN2MASK(hw->db_cnt, uint64_t);
1391 
1392 	intr_handle = &pci_dev->intr_handle;
1393 	/* Register callback func to eal lib */
1394 	rte_intr_callback_register(intr_handle,
1395 				   ntb_dev_intr_handler, dev);
1396 
1397 	ret = rte_intr_efd_enable(intr_handle, hw->db_cnt);
1398 	if (ret)
1399 		return ret;
1400 
1401 	/* To clarify, the interrupt for each doorbell is already mapped
1402 	 * by default for intel gen3. They are mapped to msix vec 1-32,
1403 	 * and hardware intr is mapped to 0. Map all to 0 for uio.
1404 	 */
1405 	if (!rte_intr_cap_multiple(intr_handle)) {
1406 		for (i = 0; i < hw->db_cnt; i++) {
1407 			if (hw->ntb_ops->vector_bind == NULL)
1408 				return -ENOTSUP;
1409 			ret = (*hw->ntb_ops->vector_bind)(dev, i, 0);
1410 			if (ret)
1411 				return ret;
1412 		}
1413 	}
1414 
1415 	if (hw->ntb_ops->db_set_mask == NULL ||
1416 	    hw->ntb_ops->peer_db_set == NULL) {
1417 		NTB_LOG(ERR, "Doorbell is not supported.");
1418 		return -ENOTSUP;
1419 	}
1420 	hw->db_mask = 0;
1421 	ret = (*hw->ntb_ops->db_set_mask)(dev, hw->db_mask);
1422 	if (ret) {
1423 		NTB_LOG(ERR, "Unable to enable intr for all dbs.");
1424 		return ret;
1425 	}
1426 
1427 	/* enable uio intr after callback register */
1428 	rte_intr_enable(intr_handle);
1429 
1430 	return ret;
1431 }
1432 
1433 static int
1434 ntb_create(struct rte_pci_device *pci_dev, int socket_id)
1435 {
1436 	char name[RTE_RAWDEV_NAME_MAX_LEN];
1437 	struct rte_rawdev *rawdev = NULL;
1438 	int ret;
1439 
1440 	if (pci_dev == NULL) {
1441 		NTB_LOG(ERR, "Invalid pci_dev.");
1442 		return -EINVAL;
1443 	}
1444 
1445 	memset(name, 0, sizeof(name));
1446 	snprintf(name, RTE_RAWDEV_NAME_MAX_LEN, "NTB:%x:%02x.%x",
1447 		 pci_dev->addr.bus, pci_dev->addr.devid,
1448 		 pci_dev->addr.function);
1449 
1450 	NTB_LOG(INFO, "Init %s on NUMA node %d", name, socket_id);
1451 
1452 	/* Allocate device structure. */
1453 	rawdev = rte_rawdev_pmd_allocate(name, sizeof(struct ntb_hw),
1454 					 socket_id);
1455 	if (rawdev == NULL) {
1456 		NTB_LOG(ERR, "Unable to allocate rawdev.");
1457 		return -EINVAL;
1458 	}
1459 
1460 	rawdev->dev_ops = &ntb_ops;
1461 	rawdev->device = &pci_dev->device;
1462 	rawdev->driver_name = pci_dev->driver->driver.name;
1463 
1464 	ret = ntb_init_hw(rawdev, pci_dev);
1465 	if (ret < 0) {
1466 		NTB_LOG(ERR, "Unable to init ntb hw.");
1467 		goto fail;
1468 	}
1469 
1470 	return ret;
1471 
1472 fail:
1473 	if (rawdev != NULL)
1474 		rte_rawdev_pmd_release(rawdev);
1475 
1476 	return ret;
1477 }
1478 
1479 static int
1480 ntb_destroy(struct rte_pci_device *pci_dev)
1481 {
1482 	char name[RTE_RAWDEV_NAME_MAX_LEN];
1483 	struct rte_rawdev *rawdev;
1484 	int ret;
1485 
1486 	if (pci_dev == NULL) {
1487 		NTB_LOG(ERR, "Invalid pci_dev.");
1488 		ret = -EINVAL;
1489 		return ret;
1490 	}
1491 
1492 	memset(name, 0, sizeof(name));
1493 	snprintf(name, RTE_RAWDEV_NAME_MAX_LEN, "NTB:%x:%02x.%x",
1494 		 pci_dev->addr.bus, pci_dev->addr.devid,
1495 		 pci_dev->addr.function);
1496 
1497 	NTB_LOG(INFO, "Closing %s on NUMA node %d", name, rte_socket_id());
1498 
1499 	rawdev = rte_rawdev_pmd_get_named_dev(name);
1500 	if (rawdev == NULL) {
1501 		NTB_LOG(ERR, "Invalid device name (%s)", name);
1502 		ret = -EINVAL;
1503 		return ret;
1504 	}
1505 
1506 	ret = rte_rawdev_pmd_release(rawdev);
1507 	if (ret)
1508 		NTB_LOG(ERR, "Failed to destroy ntb rawdev.");
1509 
1510 	return ret;
1511 }
1512 
1513 static int
1514 ntb_probe(struct rte_pci_driver *pci_drv __rte_unused,
1515 	struct rte_pci_device *pci_dev)
1516 {
1517 	return ntb_create(pci_dev, rte_socket_id());
1518 }
1519 
1520 static int
1521 ntb_remove(struct rte_pci_device *pci_dev)
1522 {
1523 	return ntb_destroy(pci_dev);
1524 }
1525 
1526 
1527 static struct rte_pci_driver rte_ntb_pmd = {
1528 	.id_table = pci_id_ntb_map,
1529 	.drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_WC_ACTIVATE,
1530 	.probe = ntb_probe,
1531 	.remove = ntb_remove,
1532 };
1533 
1534 RTE_PMD_REGISTER_PCI(raw_ntb, rte_ntb_pmd);
1535 RTE_PMD_REGISTER_PCI_TABLE(raw_ntb, pci_id_ntb_map);
1536 RTE_PMD_REGISTER_KMOD_DEP(raw_ntb, "* igb_uio | uio_pci_generic | vfio-pci");
1537 RTE_LOG_REGISTER(ntb_logtype, pmd.raw.ntb, INFO);
1538