xref: /dpdk/drivers/raw/ntb/ntb.c (revision 089e5ed727a15da2729cfee9b63533dd120bd04c)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2019 Intel Corporation.
3  */
4 #include <stdint.h>
5 #include <stdio.h>
6 #include <string.h>
7 #include <errno.h>
8 
9 #include <rte_common.h>
10 #include <rte_lcore.h>
11 #include <rte_cycles.h>
12 #include <rte_eal.h>
13 #include <rte_log.h>
14 #include <rte_pci.h>
15 #include <rte_bus_pci.h>
16 #include <rte_memzone.h>
17 #include <rte_memcpy.h>
18 #include <rte_rawdev.h>
19 #include <rte_rawdev_pmd.h>
20 
21 #include "ntb_hw_intel.h"
22 #include "ntb.h"
23 
24 int ntb_logtype;
25 
26 static const struct rte_pci_id pci_id_ntb_map[] = {
27 	{ RTE_PCI_DEVICE(NTB_INTEL_VENDOR_ID, NTB_INTEL_DEV_ID_B2B_SKX) },
28 	{ .vendor_id = 0, /* sentinel */ },
29 };
30 
31 static int
32 ntb_set_mw(struct rte_rawdev *dev, int mw_idx, uint64_t mw_size)
33 {
34 	struct ntb_hw *hw = dev->dev_private;
35 	char mw_name[RTE_MEMZONE_NAMESIZE];
36 	const struct rte_memzone *mz;
37 	int ret = 0;
38 
39 	if (hw->ntb_ops->mw_set_trans == NULL) {
40 		NTB_LOG(ERR, "Not supported to set mw.");
41 		return -ENOTSUP;
42 	}
43 
44 	snprintf(mw_name, sizeof(mw_name), "ntb_%d_mw_%d",
45 		 dev->dev_id, mw_idx);
46 
47 	mz = rte_memzone_lookup(mw_name);
48 	if (mz)
49 		return 0;
50 
51 	/**
52 	 * Hardware requires that mapped memory base address should be
53 	 * aligned with EMBARSZ and needs continuous memzone.
54 	 */
55 	mz = rte_memzone_reserve_aligned(mw_name, mw_size, dev->socket_id,
56 				RTE_MEMZONE_IOVA_CONTIG, hw->mw_size[mw_idx]);
57 	if (!mz) {
58 		NTB_LOG(ERR, "Cannot allocate aligned memzone.");
59 		return -EIO;
60 	}
61 	hw->mz[mw_idx] = mz;
62 
63 	ret = (*hw->ntb_ops->mw_set_trans)(dev, mw_idx, mz->iova, mw_size);
64 	if (ret) {
65 		NTB_LOG(ERR, "Cannot set mw translation.");
66 		return ret;
67 	}
68 
69 	return ret;
70 }
71 
72 static void
73 ntb_link_cleanup(struct rte_rawdev *dev)
74 {
75 	struct ntb_hw *hw = dev->dev_private;
76 	int status, i;
77 
78 	if (hw->ntb_ops->spad_write == NULL ||
79 	    hw->ntb_ops->mw_set_trans == NULL) {
80 		NTB_LOG(ERR, "Not supported to clean up link.");
81 		return;
82 	}
83 
84 	/* Clean spad registers. */
85 	for (i = 0; i < hw->spad_cnt; i++) {
86 		status = (*hw->ntb_ops->spad_write)(dev, i, 0, 0);
87 		if (status)
88 			NTB_LOG(ERR, "Failed to clean local spad.");
89 	}
90 
91 	/* Clear mw so that peer cannot access local memory.*/
92 	for (i = 0; i < hw->mw_cnt; i++) {
93 		status = (*hw->ntb_ops->mw_set_trans)(dev, i, 0, 0);
94 		if (status)
95 			NTB_LOG(ERR, "Failed to clean mw.");
96 	}
97 }
98 
99 static void
100 ntb_dev_intr_handler(void *param)
101 {
102 	struct rte_rawdev *dev = (struct rte_rawdev *)param;
103 	struct ntb_hw *hw = dev->dev_private;
104 	uint32_t mw_size_h, mw_size_l;
105 	uint64_t db_bits = 0;
106 	int i = 0;
107 
108 	if (hw->ntb_ops->db_read == NULL ||
109 	    hw->ntb_ops->db_clear == NULL ||
110 	    hw->ntb_ops->peer_db_set == NULL) {
111 		NTB_LOG(ERR, "Doorbell is not supported.");
112 		return;
113 	}
114 
115 	db_bits = (*hw->ntb_ops->db_read)(dev);
116 	if (!db_bits)
117 		NTB_LOG(ERR, "No doorbells");
118 
119 	/* Doorbell 0 is for peer device ready. */
120 	if (db_bits & 1) {
121 		NTB_LOG(DEBUG, "DB0: Peer device is up.");
122 		/* Clear received doorbell. */
123 		(*hw->ntb_ops->db_clear)(dev, 1);
124 
125 		/**
126 		 * Peer dev is already up. All mw settings are already done.
127 		 * Skip them.
128 		 */
129 		if (hw->peer_dev_up)
130 			return;
131 
132 		if (hw->ntb_ops->spad_read == NULL ||
133 		    hw->ntb_ops->spad_write == NULL) {
134 			NTB_LOG(ERR, "Scratchpad is not supported.");
135 			return;
136 		}
137 
138 		hw->peer_mw_cnt = (*hw->ntb_ops->spad_read)
139 				  (dev, SPAD_NUM_MWS, 0);
140 		hw->peer_mw_size = rte_zmalloc("uint64_t",
141 				   hw->peer_mw_cnt * sizeof(uint64_t), 0);
142 		for (i = 0; i < hw->mw_cnt; i++) {
143 			mw_size_h = (*hw->ntb_ops->spad_read)
144 				    (dev, SPAD_MW0_SZ_H + 2 * i, 0);
145 			mw_size_l = (*hw->ntb_ops->spad_read)
146 				    (dev, SPAD_MW0_SZ_L + 2 * i, 0);
147 			hw->peer_mw_size[i] = ((uint64_t)mw_size_h << 32) |
148 					      mw_size_l;
149 			NTB_LOG(DEBUG, "Peer %u mw size: 0x%"PRIx64"", i,
150 					hw->peer_mw_size[i]);
151 		}
152 
153 		hw->peer_dev_up = 1;
154 
155 		/**
156 		 * Handshake with peer. Spad_write only works when both
157 		 * devices are up. So write spad again when db is received.
158 		 * And set db again for the later device who may miss
159 		 * the 1st db.
160 		 */
161 		for (i = 0; i < hw->mw_cnt; i++) {
162 			(*hw->ntb_ops->spad_write)(dev, SPAD_NUM_MWS,
163 						   1, hw->mw_cnt);
164 			mw_size_h = hw->mw_size[i] >> 32;
165 			(*hw->ntb_ops->spad_write)(dev, SPAD_MW0_SZ_H + 2 * i,
166 						   1, mw_size_h);
167 
168 			mw_size_l = hw->mw_size[i];
169 			(*hw->ntb_ops->spad_write)(dev, SPAD_MW0_SZ_L + 2 * i,
170 						   1, mw_size_l);
171 		}
172 		(*hw->ntb_ops->peer_db_set)(dev, 0);
173 
174 		/* To get the link info. */
175 		if (hw->ntb_ops->get_link_status == NULL) {
176 			NTB_LOG(ERR, "Not supported to get link status.");
177 			return;
178 		}
179 		(*hw->ntb_ops->get_link_status)(dev);
180 		NTB_LOG(INFO, "Link is up. Link speed: %u. Link width: %u",
181 			hw->link_speed, hw->link_width);
182 		return;
183 	}
184 
185 	if (db_bits & (1 << 1)) {
186 		NTB_LOG(DEBUG, "DB1: Peer device is down.");
187 		/* Clear received doorbell. */
188 		(*hw->ntb_ops->db_clear)(dev, 2);
189 
190 		/* Peer device will be down, So clean local side too. */
191 		ntb_link_cleanup(dev);
192 
193 		hw->peer_dev_up = 0;
194 		/* Response peer's dev_stop request. */
195 		(*hw->ntb_ops->peer_db_set)(dev, 2);
196 		return;
197 	}
198 
199 	if (db_bits & (1 << 2)) {
200 		NTB_LOG(DEBUG, "DB2: Peer device agrees dev to be down.");
201 		/* Clear received doorbell. */
202 		(*hw->ntb_ops->db_clear)(dev, (1 << 2));
203 		hw->peer_dev_up = 0;
204 		return;
205 	}
206 }
207 
208 static void
209 ntb_queue_conf_get(struct rte_rawdev *dev __rte_unused,
210 		   uint16_t queue_id __rte_unused,
211 		   rte_rawdev_obj_t queue_conf __rte_unused)
212 {
213 }
214 
215 static int
216 ntb_queue_setup(struct rte_rawdev *dev __rte_unused,
217 		uint16_t queue_id __rte_unused,
218 		rte_rawdev_obj_t queue_conf __rte_unused)
219 {
220 	return 0;
221 }
222 
223 static int
224 ntb_queue_release(struct rte_rawdev *dev __rte_unused,
225 		  uint16_t queue_id __rte_unused)
226 {
227 	return 0;
228 }
229 
230 static uint16_t
231 ntb_queue_count(struct rte_rawdev *dev)
232 {
233 	struct ntb_hw *hw = dev->dev_private;
234 	return hw->queue_pairs;
235 }
236 
237 static int
238 ntb_enqueue_bufs(struct rte_rawdev *dev,
239 		 struct rte_rawdev_buf **buffers,
240 		 unsigned int count,
241 		 rte_rawdev_obj_t context)
242 {
243 	/* Not FIFO right now. Just for testing memory write. */
244 	struct ntb_hw *hw = dev->dev_private;
245 	unsigned int i;
246 	void *bar_addr;
247 	size_t size;
248 
249 	if (hw->ntb_ops->get_peer_mw_addr == NULL)
250 		return -ENOTSUP;
251 	bar_addr = (*hw->ntb_ops->get_peer_mw_addr)(dev, 0);
252 	size = (size_t)context;
253 
254 	for (i = 0; i < count; i++)
255 		rte_memcpy(bar_addr, buffers[i]->buf_addr, size);
256 	return 0;
257 }
258 
259 static int
260 ntb_dequeue_bufs(struct rte_rawdev *dev,
261 		 struct rte_rawdev_buf **buffers,
262 		 unsigned int count,
263 		 rte_rawdev_obj_t context)
264 {
265 	/* Not FIFO. Just for testing memory read. */
266 	struct ntb_hw *hw = dev->dev_private;
267 	unsigned int i;
268 	size_t size;
269 
270 	size = (size_t)context;
271 
272 	for (i = 0; i < count; i++)
273 		rte_memcpy(buffers[i]->buf_addr, hw->mz[i]->addr, size);
274 	return 0;
275 }
276 
277 static void
278 ntb_dev_info_get(struct rte_rawdev *dev, rte_rawdev_obj_t dev_info)
279 {
280 	struct ntb_hw *hw = dev->dev_private;
281 	struct ntb_attr *ntb_attrs = dev_info;
282 
283 	strncpy(ntb_attrs[NTB_TOPO_ID].name, NTB_TOPO_NAME, NTB_ATTR_NAME_LEN);
284 	switch (hw->topo) {
285 	case NTB_TOPO_B2B_DSD:
286 		strncpy(ntb_attrs[NTB_TOPO_ID].value, "B2B DSD",
287 			NTB_ATTR_VAL_LEN);
288 		break;
289 	case NTB_TOPO_B2B_USD:
290 		strncpy(ntb_attrs[NTB_TOPO_ID].value, "B2B USD",
291 			NTB_ATTR_VAL_LEN);
292 		break;
293 	default:
294 		strncpy(ntb_attrs[NTB_TOPO_ID].value, "Unsupported",
295 			NTB_ATTR_VAL_LEN);
296 	}
297 
298 	strncpy(ntb_attrs[NTB_LINK_STATUS_ID].name, NTB_LINK_STATUS_NAME,
299 		NTB_ATTR_NAME_LEN);
300 	snprintf(ntb_attrs[NTB_LINK_STATUS_ID].value, NTB_ATTR_VAL_LEN,
301 		 "%d", hw->link_status);
302 
303 	strncpy(ntb_attrs[NTB_SPEED_ID].name, NTB_SPEED_NAME,
304 		NTB_ATTR_NAME_LEN);
305 	snprintf(ntb_attrs[NTB_SPEED_ID].value, NTB_ATTR_VAL_LEN,
306 		 "%d", hw->link_speed);
307 
308 	strncpy(ntb_attrs[NTB_WIDTH_ID].name, NTB_WIDTH_NAME,
309 		NTB_ATTR_NAME_LEN);
310 	snprintf(ntb_attrs[NTB_WIDTH_ID].value, NTB_ATTR_VAL_LEN,
311 		 "%d", hw->link_width);
312 
313 	strncpy(ntb_attrs[NTB_MW_CNT_ID].name, NTB_MW_CNT_NAME,
314 		NTB_ATTR_NAME_LEN);
315 	snprintf(ntb_attrs[NTB_MW_CNT_ID].value, NTB_ATTR_VAL_LEN,
316 		 "%d", hw->mw_cnt);
317 
318 	strncpy(ntb_attrs[NTB_DB_CNT_ID].name, NTB_DB_CNT_NAME,
319 		NTB_ATTR_NAME_LEN);
320 	snprintf(ntb_attrs[NTB_DB_CNT_ID].value, NTB_ATTR_VAL_LEN,
321 		 "%d", hw->db_cnt);
322 
323 	strncpy(ntb_attrs[NTB_SPAD_CNT_ID].name, NTB_SPAD_CNT_NAME,
324 		NTB_ATTR_NAME_LEN);
325 	snprintf(ntb_attrs[NTB_SPAD_CNT_ID].value, NTB_ATTR_VAL_LEN,
326 		 "%d", hw->spad_cnt);
327 }
328 
329 static int
330 ntb_dev_configure(const struct rte_rawdev *dev __rte_unused,
331 		  rte_rawdev_obj_t config __rte_unused)
332 {
333 	return 0;
334 }
335 
336 static int
337 ntb_dev_start(struct rte_rawdev *dev)
338 {
339 	struct ntb_hw *hw = dev->dev_private;
340 	int ret, i;
341 
342 	/* TODO: init queues and start queues. */
343 
344 	/* Map memory of bar_size to remote. */
345 	hw->mz = rte_zmalloc("struct rte_memzone *",
346 			     hw->mw_cnt * sizeof(struct rte_memzone *), 0);
347 	for (i = 0; i < hw->mw_cnt; i++) {
348 		ret = ntb_set_mw(dev, i, hw->mw_size[i]);
349 		if (ret) {
350 			NTB_LOG(ERR, "Fail to set mw.");
351 			return ret;
352 		}
353 	}
354 
355 	dev->started = 1;
356 
357 	return 0;
358 }
359 
360 static void
361 ntb_dev_stop(struct rte_rawdev *dev)
362 {
363 	struct ntb_hw *hw = dev->dev_private;
364 	uint32_t time_out;
365 	int status;
366 
367 	/* TODO: stop rx/tx queues. */
368 
369 	if (!hw->peer_dev_up)
370 		goto clean;
371 
372 	ntb_link_cleanup(dev);
373 
374 	/* Notify the peer that device will be down. */
375 	if (hw->ntb_ops->peer_db_set == NULL) {
376 		NTB_LOG(ERR, "Peer doorbell setting is not supported.");
377 		return;
378 	}
379 	status = (*hw->ntb_ops->peer_db_set)(dev, 1);
380 	if (status) {
381 		NTB_LOG(ERR, "Failed to tell peer device is down.");
382 		return;
383 	}
384 
385 	/*
386 	 * Set time out as 1s in case that the peer is stopped accidently
387 	 * without any notification.
388 	 */
389 	time_out = 1000000;
390 
391 	/* Wait for cleanup work down before db mask clear. */
392 	while (hw->peer_dev_up && time_out) {
393 		time_out -= 10;
394 		rte_delay_us(10);
395 	}
396 
397 clean:
398 	/* Clear doorbells mask. */
399 	if (hw->ntb_ops->db_set_mask == NULL) {
400 		NTB_LOG(ERR, "Doorbell mask setting is not supported.");
401 		return;
402 	}
403 	status = (*hw->ntb_ops->db_set_mask)(dev,
404 				(((uint64_t)1 << hw->db_cnt) - 1));
405 	if (status)
406 		NTB_LOG(ERR, "Failed to clear doorbells.");
407 
408 	dev->started = 0;
409 }
410 
411 static int
412 ntb_dev_close(struct rte_rawdev *dev)
413 {
414 	struct ntb_hw *hw = dev->dev_private;
415 	struct rte_intr_handle *intr_handle;
416 	int ret = 0;
417 
418 	if (dev->started)
419 		ntb_dev_stop(dev);
420 
421 	/* TODO: free queues. */
422 
423 	intr_handle = &hw->pci_dev->intr_handle;
424 	/* Clean datapath event and vec mapping */
425 	rte_intr_efd_disable(intr_handle);
426 	if (intr_handle->intr_vec) {
427 		rte_free(intr_handle->intr_vec);
428 		intr_handle->intr_vec = NULL;
429 	}
430 	/* Disable uio intr before callback unregister */
431 	rte_intr_disable(intr_handle);
432 
433 	/* Unregister callback func to eal lib */
434 	rte_intr_callback_unregister(intr_handle,
435 				     ntb_dev_intr_handler, dev);
436 
437 	return ret;
438 }
439 
440 static int
441 ntb_dev_reset(struct rte_rawdev *rawdev __rte_unused)
442 {
443 	return 0;
444 }
445 
446 static int
447 ntb_attr_set(struct rte_rawdev *dev, const char *attr_name,
448 				 uint64_t attr_value)
449 {
450 	struct ntb_hw *hw;
451 	int index;
452 
453 	if (dev == NULL || attr_name == NULL) {
454 		NTB_LOG(ERR, "Invalid arguments for setting attributes");
455 		return -EINVAL;
456 	}
457 
458 	hw = dev->dev_private;
459 
460 	if (!strncmp(attr_name, NTB_SPAD_USER, NTB_SPAD_USER_LEN)) {
461 		if (hw->ntb_ops->spad_write == NULL)
462 			return -ENOTSUP;
463 		index = atoi(&attr_name[NTB_SPAD_USER_LEN]);
464 		(*hw->ntb_ops->spad_write)(dev, hw->spad_user_list[index],
465 					   1, attr_value);
466 		NTB_LOG(INFO, "Set attribute (%s) Value (%" PRIu64 ")",
467 			attr_name, attr_value);
468 		return 0;
469 	}
470 
471 	/* Attribute not found. */
472 	NTB_LOG(ERR, "Attribute not found.");
473 	return -EINVAL;
474 }
475 
476 static int
477 ntb_attr_get(struct rte_rawdev *dev, const char *attr_name,
478 				 uint64_t *attr_value)
479 {
480 	struct ntb_hw *hw;
481 	int index;
482 
483 	if (dev == NULL || attr_name == NULL || attr_value == NULL) {
484 		NTB_LOG(ERR, "Invalid arguments for getting attributes");
485 		return -EINVAL;
486 	}
487 
488 	hw = dev->dev_private;
489 
490 	if (!strncmp(attr_name, NTB_TOPO_NAME, NTB_ATTR_NAME_LEN)) {
491 		*attr_value = hw->topo;
492 		NTB_LOG(INFO, "Attribute (%s) Value (%" PRIu64 ")",
493 			attr_name, *attr_value);
494 		return 0;
495 	}
496 
497 	if (!strncmp(attr_name, NTB_LINK_STATUS_NAME, NTB_ATTR_NAME_LEN)) {
498 		*attr_value = hw->link_status;
499 		NTB_LOG(INFO, "Attribute (%s) Value (%" PRIu64 ")",
500 			attr_name, *attr_value);
501 		return 0;
502 	}
503 
504 	if (!strncmp(attr_name, NTB_SPEED_NAME, NTB_ATTR_NAME_LEN)) {
505 		*attr_value = hw->link_speed;
506 		NTB_LOG(INFO, "Attribute (%s) Value (%" PRIu64 ")",
507 			attr_name, *attr_value);
508 		return 0;
509 	}
510 
511 	if (!strncmp(attr_name, NTB_WIDTH_NAME, NTB_ATTR_NAME_LEN)) {
512 		*attr_value = hw->link_width;
513 		NTB_LOG(INFO, "Attribute (%s) Value (%" PRIu64 ")",
514 			attr_name, *attr_value);
515 		return 0;
516 	}
517 
518 	if (!strncmp(attr_name, NTB_MW_CNT_NAME, NTB_ATTR_NAME_LEN)) {
519 		*attr_value = hw->mw_cnt;
520 		NTB_LOG(INFO, "Attribute (%s) Value (%" PRIu64 ")",
521 			attr_name, *attr_value);
522 		return 0;
523 	}
524 
525 	if (!strncmp(attr_name, NTB_DB_CNT_NAME, NTB_ATTR_NAME_LEN)) {
526 		*attr_value = hw->db_cnt;
527 		NTB_LOG(INFO, "Attribute (%s) Value (%" PRIu64 ")",
528 			attr_name, *attr_value);
529 		return 0;
530 	}
531 
532 	if (!strncmp(attr_name, NTB_SPAD_CNT_NAME, NTB_ATTR_NAME_LEN)) {
533 		*attr_value = hw->spad_cnt;
534 		NTB_LOG(INFO, "Attribute (%s) Value (%" PRIu64 ")",
535 			attr_name, *attr_value);
536 		return 0;
537 	}
538 
539 	if (!strncmp(attr_name, NTB_SPAD_USER, NTB_SPAD_USER_LEN)) {
540 		if (hw->ntb_ops->spad_read == NULL)
541 			return -ENOTSUP;
542 		index = atoi(&attr_name[NTB_SPAD_USER_LEN]);
543 		*attr_value = (*hw->ntb_ops->spad_read)(dev,
544 				hw->spad_user_list[index], 0);
545 		NTB_LOG(INFO, "Attribute (%s) Value (%" PRIu64 ")",
546 			attr_name, *attr_value);
547 		return 0;
548 	}
549 
550 	/* Attribute not found. */
551 	NTB_LOG(ERR, "Attribute not found.");
552 	return -EINVAL;
553 }
554 
555 static int
556 ntb_xstats_get(const struct rte_rawdev *dev __rte_unused,
557 	       const unsigned int ids[] __rte_unused,
558 	       uint64_t values[] __rte_unused,
559 	       unsigned int n __rte_unused)
560 {
561 	return 0;
562 }
563 
564 static int
565 ntb_xstats_get_names(const struct rte_rawdev *dev __rte_unused,
566 		     struct rte_rawdev_xstats_name *xstats_names __rte_unused,
567 		     unsigned int size __rte_unused)
568 {
569 	return 0;
570 }
571 
572 static uint64_t
573 ntb_xstats_get_by_name(const struct rte_rawdev *dev __rte_unused,
574 		       const char *name __rte_unused,
575 		       unsigned int *id __rte_unused)
576 {
577 	return 0;
578 }
579 
580 static int
581 ntb_xstats_reset(struct rte_rawdev *dev __rte_unused,
582 		 const uint32_t ids[] __rte_unused,
583 		 uint32_t nb_ids __rte_unused)
584 {
585 	return 0;
586 }
587 
588 static const struct rte_rawdev_ops ntb_ops = {
589 	.dev_info_get         = ntb_dev_info_get,
590 	.dev_configure        = ntb_dev_configure,
591 	.dev_start            = ntb_dev_start,
592 	.dev_stop             = ntb_dev_stop,
593 	.dev_close            = ntb_dev_close,
594 	.dev_reset            = ntb_dev_reset,
595 
596 	.queue_def_conf       = ntb_queue_conf_get,
597 	.queue_setup          = ntb_queue_setup,
598 	.queue_release        = ntb_queue_release,
599 	.queue_count          = ntb_queue_count,
600 
601 	.enqueue_bufs         = ntb_enqueue_bufs,
602 	.dequeue_bufs         = ntb_dequeue_bufs,
603 
604 	.attr_get             = ntb_attr_get,
605 	.attr_set             = ntb_attr_set,
606 
607 	.xstats_get           = ntb_xstats_get,
608 	.xstats_get_names     = ntb_xstats_get_names,
609 	.xstats_get_by_name   = ntb_xstats_get_by_name,
610 	.xstats_reset         = ntb_xstats_reset,
611 };
612 
613 static int
614 ntb_init_hw(struct rte_rawdev *dev, struct rte_pci_device *pci_dev)
615 {
616 	struct ntb_hw *hw = dev->dev_private;
617 	struct rte_intr_handle *intr_handle;
618 	uint32_t val;
619 	int ret, i;
620 
621 	hw->pci_dev = pci_dev;
622 	hw->peer_dev_up = 0;
623 	hw->link_status = NTB_LINK_DOWN;
624 	hw->link_speed = NTB_SPEED_NONE;
625 	hw->link_width = NTB_WIDTH_NONE;
626 
627 	switch (pci_dev->id.device_id) {
628 	case NTB_INTEL_DEV_ID_B2B_SKX:
629 		hw->ntb_ops = &intel_ntb_ops;
630 		break;
631 	default:
632 		NTB_LOG(ERR, "Not supported device.");
633 		return -EINVAL;
634 	}
635 
636 	if (hw->ntb_ops->ntb_dev_init == NULL)
637 		return -ENOTSUP;
638 	ret = (*hw->ntb_ops->ntb_dev_init)(dev);
639 	if (ret) {
640 		NTB_LOG(ERR, "Unable to init ntb dev.");
641 		return ret;
642 	}
643 
644 	if (hw->ntb_ops->set_link == NULL)
645 		return -ENOTSUP;
646 	ret = (*hw->ntb_ops->set_link)(dev, 1);
647 	if (ret)
648 		return ret;
649 
650 	/* Init doorbell. */
651 	hw->db_valid_mask = RTE_LEN2MASK(hw->db_cnt, uint64_t);
652 
653 	intr_handle = &pci_dev->intr_handle;
654 	/* Register callback func to eal lib */
655 	rte_intr_callback_register(intr_handle,
656 				   ntb_dev_intr_handler, dev);
657 
658 	ret = rte_intr_efd_enable(intr_handle, hw->db_cnt);
659 	if (ret)
660 		return ret;
661 
662 	/* To clarify, the interrupt for each doorbell is already mapped
663 	 * by default for intel gen3. They are mapped to msix vec 1-32,
664 	 * and hardware intr is mapped to 0. Map all to 0 for uio.
665 	 */
666 	if (!rte_intr_cap_multiple(intr_handle)) {
667 		for (i = 0; i < hw->db_cnt; i++) {
668 			if (hw->ntb_ops->vector_bind == NULL)
669 				return -ENOTSUP;
670 			ret = (*hw->ntb_ops->vector_bind)(dev, i, 0);
671 			if (ret)
672 				return ret;
673 		}
674 	}
675 
676 	if (hw->ntb_ops->db_set_mask == NULL ||
677 	    hw->ntb_ops->peer_db_set == NULL) {
678 		NTB_LOG(ERR, "Doorbell is not supported.");
679 		return -ENOTSUP;
680 	}
681 	hw->db_mask = 0;
682 	ret = (*hw->ntb_ops->db_set_mask)(dev, hw->db_mask);
683 	if (ret) {
684 		NTB_LOG(ERR, "Unable to enable intr for all dbs.");
685 		return ret;
686 	}
687 
688 	/* enable uio intr after callback register */
689 	rte_intr_enable(intr_handle);
690 
691 	if (hw->ntb_ops->spad_write == NULL) {
692 		NTB_LOG(ERR, "Scratchpad is not supported.");
693 		return -ENOTSUP;
694 	}
695 	/* Tell peer the mw_cnt of local side. */
696 	ret = (*hw->ntb_ops->spad_write)(dev, SPAD_NUM_MWS, 1, hw->mw_cnt);
697 	if (ret) {
698 		NTB_LOG(ERR, "Failed to tell peer mw count.");
699 		return ret;
700 	}
701 
702 	/* Tell peer each mw size on local side. */
703 	for (i = 0; i < hw->mw_cnt; i++) {
704 		NTB_LOG(DEBUG, "Local %u mw size: 0x%"PRIx64"", i,
705 				hw->mw_size[i]);
706 		val = hw->mw_size[i] >> 32;
707 		ret = (*hw->ntb_ops->spad_write)
708 				(dev, SPAD_MW0_SZ_H + 2 * i, 1, val);
709 		if (ret) {
710 			NTB_LOG(ERR, "Failed to tell peer mw size.");
711 			return ret;
712 		}
713 
714 		val = hw->mw_size[i];
715 		ret = (*hw->ntb_ops->spad_write)
716 				(dev, SPAD_MW0_SZ_L + 2 * i, 1, val);
717 		if (ret) {
718 			NTB_LOG(ERR, "Failed to tell peer mw size.");
719 			return ret;
720 		}
721 	}
722 
723 	/* Ring doorbell 0 to tell peer the device is ready. */
724 	ret = (*hw->ntb_ops->peer_db_set)(dev, 0);
725 	if (ret) {
726 		NTB_LOG(ERR, "Failed to tell peer device is probed.");
727 		return ret;
728 	}
729 
730 	return ret;
731 }
732 
733 static int
734 ntb_create(struct rte_pci_device *pci_dev, int socket_id)
735 {
736 	char name[RTE_RAWDEV_NAME_MAX_LEN];
737 	struct rte_rawdev *rawdev = NULL;
738 	int ret;
739 
740 	if (pci_dev == NULL) {
741 		NTB_LOG(ERR, "Invalid pci_dev.");
742 		return -EINVAL;
743 	}
744 
745 	memset(name, 0, sizeof(name));
746 	snprintf(name, RTE_RAWDEV_NAME_MAX_LEN, "NTB:%x:%02x.%x",
747 		 pci_dev->addr.bus, pci_dev->addr.devid,
748 		 pci_dev->addr.function);
749 
750 	NTB_LOG(INFO, "Init %s on NUMA node %d", name, socket_id);
751 
752 	/* Allocate device structure. */
753 	rawdev = rte_rawdev_pmd_allocate(name, sizeof(struct ntb_hw),
754 					 socket_id);
755 	if (rawdev == NULL) {
756 		NTB_LOG(ERR, "Unable to allocate rawdev.");
757 		return -EINVAL;
758 	}
759 
760 	rawdev->dev_ops = &ntb_ops;
761 	rawdev->device = &pci_dev->device;
762 	rawdev->driver_name = pci_dev->driver->driver.name;
763 
764 	ret = ntb_init_hw(rawdev, pci_dev);
765 	if (ret < 0) {
766 		NTB_LOG(ERR, "Unable to init ntb hw.");
767 		goto fail;
768 	}
769 
770 	return ret;
771 
772 fail:
773 	if (rawdev != NULL)
774 		rte_rawdev_pmd_release(rawdev);
775 
776 	return ret;
777 }
778 
779 static int
780 ntb_destroy(struct rte_pci_device *pci_dev)
781 {
782 	char name[RTE_RAWDEV_NAME_MAX_LEN];
783 	struct rte_rawdev *rawdev;
784 	int ret;
785 
786 	if (pci_dev == NULL) {
787 		NTB_LOG(ERR, "Invalid pci_dev.");
788 		ret = -EINVAL;
789 		return ret;
790 	}
791 
792 	memset(name, 0, sizeof(name));
793 	snprintf(name, RTE_RAWDEV_NAME_MAX_LEN, "NTB:%x:%02x.%x",
794 		 pci_dev->addr.bus, pci_dev->addr.devid,
795 		 pci_dev->addr.function);
796 
797 	NTB_LOG(INFO, "Closing %s on NUMA node %d", name, rte_socket_id());
798 
799 	rawdev = rte_rawdev_pmd_get_named_dev(name);
800 	if (rawdev == NULL) {
801 		NTB_LOG(ERR, "Invalid device name (%s)", name);
802 		ret = -EINVAL;
803 		return ret;
804 	}
805 
806 	ret = rte_rawdev_pmd_release(rawdev);
807 	if (ret)
808 		NTB_LOG(ERR, "Failed to destroy ntb rawdev.");
809 
810 	return ret;
811 }
812 
813 static int
814 ntb_probe(struct rte_pci_driver *pci_drv __rte_unused,
815 	struct rte_pci_device *pci_dev)
816 {
817 	return ntb_create(pci_dev, rte_socket_id());
818 }
819 
820 static int
821 ntb_remove(struct rte_pci_device *pci_dev)
822 {
823 	return ntb_destroy(pci_dev);
824 }
825 
826 
827 static struct rte_pci_driver rte_ntb_pmd = {
828 	.id_table = pci_id_ntb_map,
829 	.drv_flags = RTE_PCI_DRV_NEED_MAPPING,
830 	.probe = ntb_probe,
831 	.remove = ntb_remove,
832 };
833 
834 RTE_PMD_REGISTER_PCI(raw_ntb, rte_ntb_pmd);
835 RTE_PMD_REGISTER_PCI_TABLE(raw_ntb, pci_id_ntb_map);
836 RTE_PMD_REGISTER_KMOD_DEP(raw_ntb, "* igb_uio | uio_pci_generic | vfio-pci");
837 
838 RTE_INIT(ntb_init_log)
839 {
840 	ntb_logtype = rte_log_register("pmd.raw.ntb");
841 	if (ntb_logtype >= 0)
842 		rte_log_set_level(ntb_logtype, RTE_LOG_DEBUG);
843 }
844