xref: /dpdk/drivers/vdpa/mlx5/mlx5_vdpa.c (revision e12a0166c80f65e35408f4715b2f3a60763c3741)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2019 Mellanox Technologies, Ltd
3  */
4 #include <unistd.h>
5 #include <net/if.h>
6 #include <sys/socket.h>
7 #include <sys/ioctl.h>
8 #include <sys/mman.h>
9 #include <fcntl.h>
10 #include <netinet/in.h>
11 
12 #include <rte_malloc.h>
13 #include <rte_log.h>
14 #include <rte_errno.h>
15 #include <rte_string_fns.h>
16 #include <bus_pci_driver.h>
17 #include <rte_eal_paging.h>
18 
19 #include <mlx5_glue.h>
20 #include <mlx5_common.h>
21 #include <mlx5_common_defs.h>
22 #include <mlx5_devx_cmds.h>
23 #include <mlx5_prm.h>
24 #include <mlx5_nl.h>
25 
26 #include "mlx5_vdpa_utils.h"
27 #include "mlx5_vdpa.h"
28 
29 #define MLX5_VDPA_DRIVER_NAME vdpa_mlx5
30 
31 #define MLX5_VDPA_DEFAULT_FEATURES ((1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
32 			    (1ULL << VIRTIO_F_ANY_LAYOUT) | \
33 			    (1ULL << VIRTIO_NET_F_MQ) | \
34 			    (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) | \
35 			    (1ULL << VIRTIO_F_ORDER_PLATFORM) | \
36 			    (1ULL << VHOST_F_LOG_ALL) | \
37 			    (1ULL << VIRTIO_NET_F_MTU))
38 
39 #define MLX5_VDPA_PROTOCOL_FEATURES \
40 			    ((1ULL << VHOST_USER_PROTOCOL_F_BACKEND_REQ) | \
41 			     (1ULL << VHOST_USER_PROTOCOL_F_BACKEND_SEND_FD) | \
42 			     (1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) | \
43 			     (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) | \
44 			     (1ULL << VHOST_USER_PROTOCOL_F_MQ) | \
45 			     (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU) | \
46 			     (1ULL << VHOST_USER_PROTOCOL_F_STATUS))
47 
48 #define MLX5_VDPA_DEFAULT_NO_TRAFFIC_MAX 16LLU
49 
50 TAILQ_HEAD(mlx5_vdpa_privs, mlx5_vdpa_priv) priv_list =
51 					      TAILQ_HEAD_INITIALIZER(priv_list);
52 static pthread_mutex_t priv_list_lock = PTHREAD_MUTEX_INITIALIZER;
53 
54 struct mlx5_vdpa_conf_thread_mng conf_thread_mng;
55 
56 static void mlx5_vdpa_dev_release(struct mlx5_vdpa_priv *priv);
57 
58 static struct mlx5_vdpa_priv *
mlx5_vdpa_find_priv_resource_by_vdev(struct rte_vdpa_device * vdev)59 mlx5_vdpa_find_priv_resource_by_vdev(struct rte_vdpa_device *vdev)
60 {
61 	struct mlx5_vdpa_priv *priv;
62 	int found = 0;
63 
64 	pthread_mutex_lock(&priv_list_lock);
65 	TAILQ_FOREACH(priv, &priv_list, next) {
66 		if (vdev == priv->vdev) {
67 			found = 1;
68 			break;
69 		}
70 	}
71 	pthread_mutex_unlock(&priv_list_lock);
72 	if (!found) {
73 		DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
74 		rte_errno = EINVAL;
75 		return NULL;
76 	}
77 	return priv;
78 }
79 
80 static int
mlx5_vdpa_get_queue_num(struct rte_vdpa_device * vdev,uint32_t * queue_num)81 mlx5_vdpa_get_queue_num(struct rte_vdpa_device *vdev, uint32_t *queue_num)
82 {
83 	struct mlx5_vdpa_priv *priv =
84 		mlx5_vdpa_find_priv_resource_by_vdev(vdev);
85 
86 	if (priv == NULL) {
87 		DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
88 		return -1;
89 	}
90 	*queue_num = priv->caps.max_num_virtio_queues / 2;
91 	return 0;
92 }
93 
94 static int
mlx5_vdpa_get_vdpa_features(struct rte_vdpa_device * vdev,uint64_t * features)95 mlx5_vdpa_get_vdpa_features(struct rte_vdpa_device *vdev, uint64_t *features)
96 {
97 	struct mlx5_vdpa_priv *priv =
98 		mlx5_vdpa_find_priv_resource_by_vdev(vdev);
99 
100 	if (priv == NULL) {
101 		DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
102 		return -1;
103 	}
104 	*features = MLX5_VDPA_DEFAULT_FEATURES;
105 	if (priv->caps.virtio_queue_type & (1 << MLX5_VIRTQ_TYPE_PACKED))
106 		*features |= (1ULL << VIRTIO_F_RING_PACKED);
107 	if (priv->caps.tso_ipv4)
108 		*features |= (1ULL << VIRTIO_NET_F_HOST_TSO4);
109 	if (priv->caps.tso_ipv6)
110 		*features |= (1ULL << VIRTIO_NET_F_HOST_TSO6);
111 	if (priv->caps.tx_csum)
112 		*features |= (1ULL << VIRTIO_NET_F_CSUM);
113 	if (priv->caps.rx_csum)
114 		*features |= (1ULL << VIRTIO_NET_F_GUEST_CSUM);
115 	if (priv->caps.virtio_version_1_0)
116 		*features |= (1ULL << VIRTIO_F_VERSION_1);
117 	return 0;
118 }
119 
120 static int
mlx5_vdpa_get_protocol_features(struct rte_vdpa_device * vdev,uint64_t * features)121 mlx5_vdpa_get_protocol_features(struct rte_vdpa_device *vdev,
122 		uint64_t *features)
123 {
124 	struct mlx5_vdpa_priv *priv =
125 		mlx5_vdpa_find_priv_resource_by_vdev(vdev);
126 
127 	if (priv == NULL) {
128 		DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
129 		return -1;
130 	}
131 	*features = MLX5_VDPA_PROTOCOL_FEATURES;
132 	return 0;
133 }
134 
135 static int
mlx5_vdpa_set_vring_state(int vid,int vring,int state)136 mlx5_vdpa_set_vring_state(int vid, int vring, int state)
137 {
138 	struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
139 	struct mlx5_vdpa_priv *priv =
140 		mlx5_vdpa_find_priv_resource_by_vdev(vdev);
141 	struct mlx5_vdpa_virtq *virtq;
142 	int ret;
143 
144 	if (priv == NULL) {
145 		DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
146 		return -EINVAL;
147 	}
148 	if (vring >= (int)priv->caps.max_num_virtio_queues) {
149 		DRV_LOG(ERR, "Too big vring id: %d.", vring);
150 		return -E2BIG;
151 	}
152 	virtq = &priv->virtqs[vring];
153 	pthread_mutex_lock(&virtq->virtq_lock);
154 	ret = mlx5_vdpa_virtq_enable(priv, vring, state);
155 	pthread_mutex_unlock(&virtq->virtq_lock);
156 	return ret;
157 }
158 
159 static int
mlx5_vdpa_features_set(int vid)160 mlx5_vdpa_features_set(int vid)
161 {
162 	struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
163 	struct mlx5_vdpa_priv *priv =
164 		mlx5_vdpa_find_priv_resource_by_vdev(vdev);
165 	uint64_t log_base, log_size;
166 	uint64_t features;
167 	int ret;
168 
169 	if (priv == NULL) {
170 		DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
171 		return -EINVAL;
172 	}
173 	ret = rte_vhost_get_negotiated_features(vid, &features);
174 	if (ret) {
175 		DRV_LOG(ERR, "Failed to get negotiated features.");
176 		return ret;
177 	}
178 	if (RTE_VHOST_NEED_LOG(features)) {
179 		ret = rte_vhost_get_log_base(vid, &log_base, &log_size);
180 		if (ret) {
181 			DRV_LOG(ERR, "Failed to get log base.");
182 			return ret;
183 		}
184 		ret = mlx5_vdpa_dirty_bitmap_set(priv, log_base, log_size);
185 		if (ret) {
186 			DRV_LOG(ERR, "Failed to set dirty bitmap.");
187 			return ret;
188 		}
189 		DRV_LOG(INFO, "mlx5 vdpa: enabling dirty logging...");
190 		ret = mlx5_vdpa_logging_enable(priv, 1);
191 		if (ret) {
192 			DRV_LOG(ERR, "Failed t enable dirty logging.");
193 			return ret;
194 		}
195 	}
196 	return 0;
197 }
198 
199 static int
mlx5_vdpa_mtu_set(struct mlx5_vdpa_priv * priv)200 mlx5_vdpa_mtu_set(struct mlx5_vdpa_priv *priv)
201 {
202 	struct ifreq request;
203 	uint16_t vhost_mtu = 0;
204 	uint16_t kern_mtu = 0;
205 	int ret = rte_vhost_get_mtu(priv->vid, &vhost_mtu);
206 	int sock;
207 	int retries = MLX5_VDPA_MAX_RETRIES;
208 
209 	if (ret) {
210 		DRV_LOG(DEBUG, "Cannot get vhost MTU - %d.", ret);
211 		return ret;
212 	}
213 	if (!vhost_mtu) {
214 		DRV_LOG(DEBUG, "Vhost MTU is 0.");
215 		return ret;
216 	}
217 	ret = mlx5_get_ifname_sysfs
218 				(mlx5_os_get_ctx_device_name(priv->cdev->ctx),
219 				 request.ifr_name);
220 	if (ret) {
221 		DRV_LOG(DEBUG, "Cannot get kernel IF name - %d.", ret);
222 		return ret;
223 	}
224 	sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP);
225 	if (sock == -1) {
226 		DRV_LOG(DEBUG, "Cannot open IF socket.");
227 		return sock;
228 	}
229 	while (retries--) {
230 		ret = ioctl(sock, SIOCGIFMTU, &request);
231 		if (ret == -1)
232 			break;
233 		kern_mtu = request.ifr_mtu;
234 		DRV_LOG(DEBUG, "MTU: current %d requested %d.", (int)kern_mtu,
235 			(int)vhost_mtu);
236 		if (kern_mtu == vhost_mtu)
237 			break;
238 		request.ifr_mtu = vhost_mtu;
239 		ret = ioctl(sock, SIOCSIFMTU, &request);
240 		if (ret == -1)
241 			break;
242 		request.ifr_mtu = 0;
243 		usleep(MLX5_VDPA_USEC);
244 	}
245 	close(sock);
246 	return kern_mtu == vhost_mtu ? 0 : -1;
247 }
248 
249 void
mlx5_vdpa_dev_cache_clean(struct mlx5_vdpa_priv * priv)250 mlx5_vdpa_dev_cache_clean(struct mlx5_vdpa_priv *priv)
251 {
252 	/* Clean pre-created resource in dev removal only. */
253 	if (!priv->queues)
254 		mlx5_vdpa_virtqs_cleanup(priv);
255 	mlx5_vdpa_mem_dereg(priv);
256 }
257 
258 static bool
mlx5_vdpa_wait_dev_close_tasks_done(struct mlx5_vdpa_priv * priv)259 mlx5_vdpa_wait_dev_close_tasks_done(struct mlx5_vdpa_priv *priv)
260 {
261 	uint32_t timeout = 0;
262 
263 	/* Check and wait all close tasks done. */
264 	while (rte_atomic_load_explicit(&priv->dev_close_progress,
265 		rte_memory_order_relaxed) != 0 && timeout < 1000) {
266 		rte_delay_us_sleep(10000);
267 		timeout++;
268 	}
269 	if (priv->dev_close_progress) {
270 		DRV_LOG(ERR,
271 		"Failed to wait close device tasks done vid %d.",
272 		priv->vid);
273 		return true;
274 	}
275 	return false;
276 }
277 
278 static int
_internal_mlx5_vdpa_dev_close(struct mlx5_vdpa_priv * priv,bool release_resource)279 _internal_mlx5_vdpa_dev_close(struct mlx5_vdpa_priv *priv,
280 		bool release_resource)
281 {
282 	int ret = 0;
283 	int vid = priv->vid;
284 
285 	mlx5_vdpa_virtq_unreg_intr_handle_all(priv);
286 	mlx5_vdpa_cqe_event_unset(priv);
287 	if (priv->state == MLX5_VDPA_STATE_CONFIGURED) {
288 		ret |= mlx5_vdpa_lm_log(priv);
289 		priv->state = MLX5_VDPA_STATE_IN_PROGRESS;
290 	}
291 	if (priv->use_c_thread && !release_resource) {
292 		if (priv->last_c_thrd_idx >=
293 			(conf_thread_mng.max_thrds - 1))
294 			priv->last_c_thrd_idx = 0;
295 		else
296 			priv->last_c_thrd_idx++;
297 		rte_atomic_store_explicit(&priv->dev_close_progress,
298 			1, rte_memory_order_relaxed);
299 		if (mlx5_vdpa_task_add(priv,
300 			priv->last_c_thrd_idx,
301 			MLX5_VDPA_TASK_DEV_CLOSE_NOWAIT,
302 			NULL, NULL, NULL, 1)) {
303 			DRV_LOG(ERR,
304 			"Fail to add dev close task. ");
305 			goto single_thrd;
306 		}
307 		priv->state = MLX5_VDPA_STATE_PROBED;
308 		DRV_LOG(INFO, "vDPA device %d was closed.", vid);
309 		return ret;
310 	}
311 single_thrd:
312 	pthread_mutex_lock(&priv->steer_update_lock);
313 	mlx5_vdpa_steer_unset(priv);
314 	pthread_mutex_unlock(&priv->steer_update_lock);
315 	mlx5_vdpa_virtqs_release(priv, release_resource);
316 	mlx5_vdpa_drain_cq(priv);
317 	if (priv->lm_mr.addr)
318 		mlx5_os_wrapped_mkey_destroy(&priv->lm_mr);
319 	if (!priv->connected)
320 		mlx5_vdpa_dev_cache_clean(priv);
321 	priv->vid = 0;
322 	rte_atomic_store_explicit(&priv->dev_close_progress, 0,
323 		rte_memory_order_relaxed);
324 	priv->state = MLX5_VDPA_STATE_PROBED;
325 	DRV_LOG(INFO, "vDPA device %d was closed.", vid);
326 	return ret;
327 }
328 
329 static int
mlx5_vdpa_dev_close(int vid)330 mlx5_vdpa_dev_close(int vid)
331 {
332 	struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
333 	struct mlx5_vdpa_priv *priv;
334 
335 	if (!vdev) {
336 		DRV_LOG(ERR, "Invalid vDPA device.");
337 		return -1;
338 	}
339 	priv = mlx5_vdpa_find_priv_resource_by_vdev(vdev);
340 	if (priv == NULL) {
341 		DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
342 		return -1;
343 	}
344 	return _internal_mlx5_vdpa_dev_close(priv, false);
345 }
346 
347 static int
mlx5_vdpa_dev_config(int vid)348 mlx5_vdpa_dev_config(int vid)
349 {
350 	struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
351 	struct mlx5_vdpa_priv *priv =
352 		mlx5_vdpa_find_priv_resource_by_vdev(vdev);
353 
354 	if (priv == NULL) {
355 		DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
356 		return -EINVAL;
357 	}
358 	if (priv->state == MLX5_VDPA_STATE_CONFIGURED &&
359 	    mlx5_vdpa_dev_close(vid)) {
360 		DRV_LOG(ERR, "Failed to reconfigure vid %d.", vid);
361 		return -1;
362 	}
363 	if (mlx5_vdpa_wait_dev_close_tasks_done(priv))
364 		return -1;
365 	priv->vid = vid;
366 	priv->connected = true;
367 	if (mlx5_vdpa_mtu_set(priv))
368 		DRV_LOG(WARNING, "MTU cannot be set on device %s.",
369 				vdev->device->name);
370 	if (mlx5_vdpa_mem_register(priv) ||
371 	    mlx5_vdpa_virtqs_prepare(priv) || mlx5_vdpa_steer_setup(priv) ||
372 	    mlx5_vdpa_cqe_event_setup(priv)) {
373 		mlx5_vdpa_dev_close(vid);
374 		return -1;
375 	}
376 	priv->state = MLX5_VDPA_STATE_CONFIGURED;
377 	DRV_LOG(INFO, "vDPA device %d was configured.", vid);
378 	return 0;
379 }
380 
381 static int
mlx5_vdpa_get_device_fd(int vid)382 mlx5_vdpa_get_device_fd(int vid)
383 {
384 	struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
385 	struct mlx5_vdpa_priv *priv =
386 		mlx5_vdpa_find_priv_resource_by_vdev(vdev);
387 
388 	if (priv == NULL) {
389 		DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
390 		return -EINVAL;
391 	}
392 	return ((struct ibv_context *)priv->cdev->ctx)->cmd_fd;
393 }
394 
395 static int
mlx5_vdpa_get_notify_area(int vid,int qid,uint64_t * offset,uint64_t * size)396 mlx5_vdpa_get_notify_area(int vid, int qid, uint64_t *offset, uint64_t *size)
397 {
398 	struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
399 	struct mlx5_vdpa_priv *priv =
400 		mlx5_vdpa_find_priv_resource_by_vdev(vdev);
401 
402 	RTE_SET_USED(qid);
403 	if (priv == NULL) {
404 		DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
405 		return -EINVAL;
406 	}
407 	if (!priv->var) {
408 		DRV_LOG(ERR, "VAR was not created for device %s, is the device"
409 			" configured?.", vdev->device->name);
410 		return -EINVAL;
411 	}
412 	*offset = priv->var->mmap_off;
413 	*size = priv->var->length;
414 	return 0;
415 }
416 
417 static int
mlx5_vdpa_get_stats_names(struct rte_vdpa_device * vdev,struct rte_vdpa_stat_name * stats_names,unsigned int size)418 mlx5_vdpa_get_stats_names(struct rte_vdpa_device *vdev,
419 		struct rte_vdpa_stat_name *stats_names,
420 		unsigned int size)
421 {
422 	static const char *mlx5_vdpa_stats_names[MLX5_VDPA_STATS_MAX] = {
423 		"received_descriptors",
424 		"completed_descriptors",
425 		"bad descriptor errors",
426 		"exceed max chain",
427 		"invalid buffer",
428 		"completion errors",
429 	};
430 	struct mlx5_vdpa_priv *priv =
431 		mlx5_vdpa_find_priv_resource_by_vdev(vdev);
432 	unsigned int i;
433 
434 	if (priv == NULL) {
435 		DRV_LOG(ERR, "Invalid device: %s.", vdev->device->name);
436 		return -ENODEV;
437 	}
438 	if (!stats_names)
439 		return MLX5_VDPA_STATS_MAX;
440 	size = RTE_MIN(size, (unsigned int)MLX5_VDPA_STATS_MAX);
441 	for (i = 0; i < size; ++i)
442 		strlcpy(stats_names[i].name, mlx5_vdpa_stats_names[i],
443 			RTE_VDPA_STATS_NAME_SIZE);
444 	return size;
445 }
446 
447 static int
mlx5_vdpa_get_stats(struct rte_vdpa_device * vdev,int qid,struct rte_vdpa_stat * stats,unsigned int n)448 mlx5_vdpa_get_stats(struct rte_vdpa_device *vdev, int qid,
449 		struct rte_vdpa_stat *stats, unsigned int n)
450 {
451 	struct mlx5_vdpa_priv *priv =
452 		mlx5_vdpa_find_priv_resource_by_vdev(vdev);
453 
454 	if (priv == NULL) {
455 		DRV_LOG(ERR, "Invalid device: %s.", vdev->device->name);
456 		return -ENODEV;
457 	}
458 	if (qid >= (int)priv->caps.max_num_virtio_queues) {
459 		DRV_LOG(ERR, "Too big vring id: %d for device %s.", qid,
460 				vdev->device->name);
461 		return -E2BIG;
462 	}
463 	if (!priv->caps.queue_counters_valid) {
464 		DRV_LOG(ERR, "Virtq statistics is not supported for device %s.",
465 			vdev->device->name);
466 		return -ENOTSUP;
467 	}
468 	return mlx5_vdpa_virtq_stats_get(priv, qid, stats, n);
469 }
470 
471 static int
mlx5_vdpa_reset_stats(struct rte_vdpa_device * vdev,int qid)472 mlx5_vdpa_reset_stats(struct rte_vdpa_device *vdev, int qid)
473 {
474 	struct mlx5_vdpa_priv *priv =
475 		mlx5_vdpa_find_priv_resource_by_vdev(vdev);
476 
477 	if (priv == NULL) {
478 		DRV_LOG(ERR, "Invalid device: %s.", vdev->device->name);
479 		return -ENODEV;
480 	}
481 	if (qid >= (int)priv->caps.max_num_virtio_queues) {
482 		DRV_LOG(ERR, "Too big vring id: %d for device %s.", qid,
483 				vdev->device->name);
484 		return -E2BIG;
485 	}
486 	if (!priv->caps.queue_counters_valid) {
487 		DRV_LOG(ERR, "Virtq statistics is not supported for device %s.",
488 			vdev->device->name);
489 		return -ENOTSUP;
490 	}
491 	return mlx5_vdpa_virtq_stats_reset(priv, qid);
492 }
493 
494 static int
mlx5_vdpa_dev_cleanup(int vid)495 mlx5_vdpa_dev_cleanup(int vid)
496 {
497 	struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
498 	struct mlx5_vdpa_priv *priv;
499 
500 	if (vdev == NULL)
501 		return -1;
502 	priv = mlx5_vdpa_find_priv_resource_by_vdev(vdev);
503 	if (priv == NULL) {
504 		DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
505 		return -1;
506 	}
507 	if (priv->state == MLX5_VDPA_STATE_PROBED) {
508 		if (priv->use_c_thread)
509 			mlx5_vdpa_wait_dev_close_tasks_done(priv);
510 		mlx5_vdpa_dev_cache_clean(priv);
511 	}
512 	priv->connected = false;
513 	return 0;
514 }
515 
516 static struct rte_vdpa_dev_ops mlx5_vdpa_ops = {
517 	.get_queue_num = mlx5_vdpa_get_queue_num,
518 	.get_features = mlx5_vdpa_get_vdpa_features,
519 	.get_protocol_features = mlx5_vdpa_get_protocol_features,
520 	.dev_conf = mlx5_vdpa_dev_config,
521 	.dev_close = mlx5_vdpa_dev_close,
522 	.dev_cleanup = mlx5_vdpa_dev_cleanup,
523 	.set_vring_state = mlx5_vdpa_set_vring_state,
524 	.set_features = mlx5_vdpa_features_set,
525 	.migration_done = NULL,
526 	.get_vfio_group_fd = NULL,
527 	.get_vfio_device_fd = mlx5_vdpa_get_device_fd,
528 	.get_notify_area = mlx5_vdpa_get_notify_area,
529 	.get_stats_names = mlx5_vdpa_get_stats_names,
530 	.get_stats = mlx5_vdpa_get_stats,
531 	.reset_stats = mlx5_vdpa_reset_stats,
532 };
533 
534 static int
mlx5_vdpa_args_check_handler(const char * key,const char * val,void * opaque)535 mlx5_vdpa_args_check_handler(const char *key, const char *val, void *opaque)
536 {
537 	struct mlx5_vdpa_priv *priv = opaque;
538 	unsigned long tmp;
539 	int n_cores = sysconf(_SC_NPROCESSORS_ONLN);
540 
541 	errno = 0;
542 	tmp = strtoul(val, NULL, 0);
543 	if (errno) {
544 		DRV_LOG(WARNING, "%s: \"%s\" is an invalid integer.", key, val);
545 		return -errno;
546 	}
547 	if (strcmp(key, "event_mode") == 0) {
548 		if (tmp <= MLX5_VDPA_EVENT_MODE_ONLY_INTERRUPT)
549 			priv->event_mode = (int)tmp;
550 		else
551 			DRV_LOG(WARNING, "Invalid event_mode %s.", val);
552 	} else if (strcmp(key, "event_us") == 0) {
553 		priv->event_us = (uint32_t)tmp;
554 	} else if (strcmp(key, "no_traffic_time") == 0) {
555 		priv->no_traffic_max = (uint32_t)tmp;
556 	} else if (strcmp(key, "event_core") == 0) {
557 		if (tmp >= (unsigned long)n_cores)
558 			DRV_LOG(WARNING, "Invalid event_core %s.", val);
559 		else
560 			priv->event_core = tmp;
561 	} else if (strcmp(key, "max_conf_threads") == 0) {
562 		if (tmp) {
563 			priv->use_c_thread = true;
564 			if (!conf_thread_mng.initializer_priv) {
565 				conf_thread_mng.initializer_priv = priv;
566 				if (tmp > MLX5_VDPA_MAX_C_THRD) {
567 					DRV_LOG(WARNING,
568 				"Invalid max_conf_threads %s "
569 				"and set max_conf_threads to %d",
570 				val, MLX5_VDPA_MAX_C_THRD);
571 					tmp = MLX5_VDPA_MAX_C_THRD;
572 				}
573 				conf_thread_mng.max_thrds = tmp;
574 			} else if (tmp != conf_thread_mng.max_thrds) {
575 				DRV_LOG(WARNING,
576 	"max_conf_threads is PMD argument and not per device, "
577 	"only the first device configuration set it, current value is %d "
578 	"and will not be changed to %d.",
579 				conf_thread_mng.max_thrds, (int)tmp);
580 			}
581 		} else {
582 			priv->use_c_thread = false;
583 		}
584 	} else if (strcmp(key, "hw_latency_mode") == 0) {
585 		priv->hw_latency_mode = (uint32_t)tmp;
586 	} else if (strcmp(key, "hw_max_latency_us") == 0) {
587 		priv->hw_max_latency_us = (uint32_t)tmp;
588 	} else if (strcmp(key, "hw_max_pending_comp") == 0) {
589 		priv->hw_max_pending_comp = (uint32_t)tmp;
590 	} else if (strcmp(key, "queue_size") == 0) {
591 		priv->queue_size = (uint16_t)tmp;
592 	} else if (strcmp(key, "queues") == 0) {
593 		priv->queues = (uint16_t)tmp;
594 	} else {
595 		DRV_LOG(WARNING, "Invalid key %s.", key);
596 	}
597 	return 0;
598 }
599 
600 static void
mlx5_vdpa_config_get(struct mlx5_kvargs_ctrl * mkvlist,struct mlx5_vdpa_priv * priv)601 mlx5_vdpa_config_get(struct mlx5_kvargs_ctrl *mkvlist,
602 		     struct mlx5_vdpa_priv *priv)
603 {
604 	const char **params = (const char *[]){
605 		"event_core",
606 		"event_mode",
607 		"event_us",
608 		"hw_latency_mode",
609 		"hw_max_latency_us",
610 		"hw_max_pending_comp",
611 		"no_traffic_time",
612 		"queue_size",
613 		"queues",
614 		"max_conf_threads",
615 		NULL,
616 	};
617 
618 	priv->event_mode = MLX5_VDPA_EVENT_MODE_FIXED_TIMER;
619 	priv->event_us = 0;
620 	priv->event_core = -1;
621 	priv->no_traffic_max = MLX5_VDPA_DEFAULT_NO_TRAFFIC_MAX;
622 	if (mkvlist == NULL)
623 		return;
624 	mlx5_kvargs_process(mkvlist, params, mlx5_vdpa_args_check_handler,
625 			    priv);
626 	if (!priv->event_us &&
627 	    priv->event_mode == MLX5_VDPA_EVENT_MODE_DYNAMIC_TIMER)
628 		priv->event_us = MLX5_VDPA_DEFAULT_TIMER_STEP_US;
629 	if ((priv->queue_size && !priv->queues) ||
630 		(!priv->queue_size && priv->queues)) {
631 		priv->queue_size = 0;
632 		priv->queues = 0;
633 		DRV_LOG(WARNING, "Please provide both queue_size and queues.");
634 	}
635 	DRV_LOG(DEBUG, "event mode is %d.", priv->event_mode);
636 	DRV_LOG(DEBUG, "event_us is %u us.", priv->event_us);
637 	DRV_LOG(DEBUG, "no traffic max is %u.", priv->no_traffic_max);
638 	DRV_LOG(DEBUG, "queues is %u, queue_size is %u.", priv->queues,
639 		priv->queue_size);
640 }
641 
642 void
mlx5_vdpa_prepare_virtq_destroy(struct mlx5_vdpa_priv * priv)643 mlx5_vdpa_prepare_virtq_destroy(struct mlx5_vdpa_priv *priv)
644 {
645 	uint32_t max_queues, index;
646 	struct mlx5_vdpa_virtq *virtq;
647 
648 	if (!priv->queues || !priv->queue_size)
649 		return;
650 	max_queues = ((priv->queues * 2) < priv->caps.max_num_virtio_queues) ?
651 		(priv->queues * 2) : (priv->caps.max_num_virtio_queues);
652 	if (mlx5_vdpa_is_modify_virtq_supported(priv))
653 		mlx5_vdpa_steer_unset(priv);
654 	for (index = 0; index < max_queues; ++index) {
655 		virtq = &priv->virtqs[index];
656 		if (virtq->virtq) {
657 			pthread_mutex_lock(&virtq->virtq_lock);
658 			mlx5_vdpa_virtq_unset(virtq);
659 			pthread_mutex_unlock(&virtq->virtq_lock);
660 		}
661 	}
662 }
663 
664 static int
mlx5_vdpa_virtq_resource_prepare(struct mlx5_vdpa_priv * priv)665 mlx5_vdpa_virtq_resource_prepare(struct mlx5_vdpa_priv *priv)
666 {
667 	RTE_ATOMIC(uint32_t) remaining_cnt = 0;
668 	RTE_ATOMIC(uint32_t) err_cnt = 0;
669 	uint32_t task_num = 0;
670 	uint32_t max_queues, index, thrd_idx, data[1];
671 	struct mlx5_vdpa_virtq *virtq;
672 
673 	for (index = 0; index < priv->caps.max_num_virtio_queues;
674 		index++) {
675 		virtq = &priv->virtqs[index];
676 		pthread_mutex_init(&virtq->virtq_lock, NULL);
677 	}
678 	if (!priv->queues || !priv->queue_size)
679 		return 0;
680 	max_queues = (priv->queues < priv->caps.max_num_virtio_queues) ?
681 		(priv->queues * 2) : (priv->caps.max_num_virtio_queues);
682 	if (priv->use_c_thread) {
683 		uint32_t main_task_idx[max_queues];
684 
685 		for (index = 0; index < max_queues; ++index) {
686 			thrd_idx = index % (conf_thread_mng.max_thrds + 1);
687 			if (!thrd_idx) {
688 				main_task_idx[task_num] = index;
689 				task_num++;
690 				continue;
691 			}
692 			thrd_idx = priv->last_c_thrd_idx + 1;
693 			if (thrd_idx >= conf_thread_mng.max_thrds)
694 				thrd_idx = 0;
695 			priv->last_c_thrd_idx = thrd_idx;
696 			data[0] = index;
697 			if (mlx5_vdpa_task_add(priv, thrd_idx,
698 				MLX5_VDPA_TASK_PREPARE_VIRTQ,
699 				&remaining_cnt, &err_cnt,
700 				(void **)&data, 1)) {
701 				DRV_LOG(ERR, "Fail to add "
702 				"task prepare virtq (%d).", index);
703 				main_task_idx[task_num] = index;
704 				task_num++;
705 			}
706 		}
707 		for (index = 0; index < task_num; ++index)
708 			if (mlx5_vdpa_virtq_single_resource_prepare(priv,
709 				main_task_idx[index]))
710 				goto error;
711 		if (mlx5_vdpa_c_thread_wait_bulk_tasks_done(&remaining_cnt,
712 			&err_cnt, 2000)) {
713 			DRV_LOG(ERR,
714 			"Failed to wait virt-queue prepare tasks ready.");
715 			goto error;
716 		}
717 	} else {
718 		for (index = 0; index < max_queues; ++index)
719 			if (mlx5_vdpa_virtq_single_resource_prepare(priv,
720 				index))
721 				goto error;
722 	}
723 	if (mlx5_vdpa_is_modify_virtq_supported(priv))
724 		if (mlx5_vdpa_steer_update(priv, true))
725 			goto error;
726 	return 0;
727 error:
728 	mlx5_vdpa_prepare_virtq_destroy(priv);
729 	return -1;
730 }
731 
732 static int
mlx5_vdpa_create_dev_resources(struct mlx5_vdpa_priv * priv)733 mlx5_vdpa_create_dev_resources(struct mlx5_vdpa_priv *priv)
734 {
735 	struct mlx5_devx_tis_attr tis_attr = {0};
736 	struct ibv_context *ctx = priv->cdev->ctx;
737 	uint32_t i;
738 	int retry;
739 
740 	for (retry = 0; retry < 7; retry++) {
741 		priv->var = mlx5_glue->dv_alloc_var(ctx, 0);
742 		if (priv->var != NULL)
743 			break;
744 		DRV_LOG(WARNING, "Failed to allocate VAR, retry %d.", retry);
745 		/* Wait Qemu release VAR during vdpa restart, 0.1 sec based. */
746 		usleep(100000U << retry);
747 	}
748 	if (!priv->var) {
749 		DRV_LOG(ERR, "Failed to allocate VAR %u.", errno);
750 		rte_errno = ENOMEM;
751 		return -rte_errno;
752 	}
753 	/* Always map the entire page. */
754 	priv->virtq_db_addr = mmap(NULL, priv->var->length, PROT_READ |
755 				   PROT_WRITE, MAP_SHARED, ctx->cmd_fd,
756 				   priv->var->mmap_off);
757 	if (priv->virtq_db_addr == MAP_FAILED) {
758 		DRV_LOG(ERR, "Failed to map doorbell page %u.", errno);
759 		priv->virtq_db_addr = NULL;
760 		rte_errno = errno;
761 		return -rte_errno;
762 	}
763 	/* Add within page offset for 64K page system. */
764 	priv->virtq_db_addr = (char *)priv->virtq_db_addr +
765 		((rte_mem_page_size() - 1) & priv->caps.doorbell_bar_offset);
766 	DRV_LOG(DEBUG, "VAR address of doorbell mapping is %p.",
767 		priv->virtq_db_addr);
768 	priv->td = mlx5_devx_cmd_create_td(ctx);
769 	if (!priv->td) {
770 		DRV_LOG(ERR, "Failed to create transport domain.");
771 		rte_errno = errno;
772 		return -rte_errno;
773 	}
774 	tis_attr.transport_domain = priv->td->id;
775 	for (i = 0; i < priv->num_lag_ports; i++) {
776 		/* 0 is auto affinity, non-zero value to propose port. */
777 		tis_attr.lag_tx_port_affinity = i + 1;
778 		priv->tiss[i] = mlx5_devx_cmd_create_tis(ctx, &tis_attr);
779 		if (!priv->tiss[i]) {
780 			DRV_LOG(ERR, "Failed to create TIS %u.", i);
781 			return -rte_errno;
782 		}
783 	}
784 	priv->null_mr = mlx5_glue->alloc_null_mr(priv->cdev->pd);
785 	if (!priv->null_mr) {
786 		DRV_LOG(ERR, "Failed to allocate null MR.");
787 		rte_errno = errno;
788 		return -rte_errno;
789 	}
790 	DRV_LOG(DEBUG, "Dump fill Mkey = %u.", priv->null_mr->lkey);
791 #ifdef HAVE_MLX5DV_DR
792 	priv->steer.domain = mlx5_glue->dr_create_domain(ctx,
793 					MLX5DV_DR_DOMAIN_TYPE_NIC_RX);
794 	if (!priv->steer.domain) {
795 		DRV_LOG(ERR, "Failed to create Rx domain.");
796 		rte_errno = errno;
797 		return -rte_errno;
798 	}
799 #endif
800 	priv->steer.tbl = mlx5_glue->dr_create_flow_tbl(priv->steer.domain, 0);
801 	if (!priv->steer.tbl) {
802 		DRV_LOG(ERR, "Failed to create table 0 with Rx domain.");
803 		rte_errno = errno;
804 		return -rte_errno;
805 	}
806 	if (mlx5_vdpa_err_event_setup(priv) != 0)
807 		return -rte_errno;
808 	if (mlx5_vdpa_event_qp_global_prepare(priv))
809 		return -rte_errno;
810 	if (mlx5_vdpa_virtq_resource_prepare(priv))
811 		return -rte_errno;
812 	return 0;
813 }
814 
815 static int
mlx5_vdpa_dev_probe(struct mlx5_common_device * cdev,struct mlx5_kvargs_ctrl * mkvlist)816 mlx5_vdpa_dev_probe(struct mlx5_common_device *cdev,
817 		    struct mlx5_kvargs_ctrl *mkvlist)
818 {
819 	struct mlx5_vdpa_priv *priv = NULL;
820 	struct mlx5_hca_attr *attr = &cdev->config.hca_attr;
821 
822 	if (!attr->vdpa.valid || !attr->vdpa.max_num_virtio_queues) {
823 		DRV_LOG(ERR, "Not enough capabilities to support vdpa, maybe "
824 			"old FW/OFED version?");
825 		rte_errno = ENOTSUP;
826 		return -rte_errno;
827 	}
828 	if (!attr->vdpa.queue_counters_valid)
829 		DRV_LOG(DEBUG, "No capability to support virtq statistics.");
830 	priv = rte_zmalloc("mlx5 vDPA device private", sizeof(*priv) +
831 			   sizeof(struct mlx5_vdpa_virtq) *
832 			   attr->vdpa.max_num_virtio_queues,
833 			   RTE_CACHE_LINE_SIZE);
834 	if (!priv) {
835 		DRV_LOG(ERR, "Failed to allocate private memory.");
836 		rte_errno = ENOMEM;
837 		return -rte_errno;
838 	}
839 	priv->caps = attr->vdpa;
840 	priv->log_max_rqt_size = attr->log_max_rqt_size;
841 	priv->num_lag_ports = attr->num_lag_ports;
842 	if (attr->num_lag_ports == 0)
843 		priv->num_lag_ports = 1;
844 	rte_spinlock_init(&priv->db_lock);
845 	pthread_mutex_init(&priv->steer_update_lock, NULL);
846 	priv->cdev = cdev;
847 	mlx5_vdpa_config_get(mkvlist, priv);
848 	if (priv->use_c_thread) {
849 		if (conf_thread_mng.initializer_priv == priv)
850 			if (mlx5_vdpa_mult_threads_create())
851 				goto error;
852 		rte_atomic_fetch_add_explicit(&conf_thread_mng.refcnt, 1,
853 			rte_memory_order_relaxed);
854 	}
855 	if (mlx5_vdpa_create_dev_resources(priv))
856 		goto error;
857 	priv->vdev = rte_vdpa_register_device(cdev->dev, &mlx5_vdpa_ops);
858 	if (priv->vdev == NULL) {
859 		DRV_LOG(ERR, "Failed to register vDPA device.");
860 		rte_errno = rte_errno ? rte_errno : EINVAL;
861 		goto error;
862 	}
863 	pthread_mutex_lock(&priv_list_lock);
864 	TAILQ_INSERT_TAIL(&priv_list, priv, next);
865 	pthread_mutex_unlock(&priv_list_lock);
866 	return 0;
867 error:
868 	if (conf_thread_mng.initializer_priv == priv)
869 		mlx5_vdpa_mult_threads_destroy(false);
870 	if (priv)
871 		mlx5_vdpa_dev_release(priv);
872 	return -rte_errno;
873 }
874 
875 static int
mlx5_vdpa_dev_remove(struct mlx5_common_device * cdev)876 mlx5_vdpa_dev_remove(struct mlx5_common_device *cdev)
877 {
878 	struct mlx5_vdpa_priv *priv = NULL;
879 	int found = 0;
880 
881 	pthread_mutex_lock(&priv_list_lock);
882 	TAILQ_FOREACH(priv, &priv_list, next) {
883 		if (priv->vdev->device == cdev->dev) {
884 			found = 1;
885 			break;
886 		}
887 	}
888 	if (found)
889 		TAILQ_REMOVE(&priv_list, priv, next);
890 	pthread_mutex_unlock(&priv_list_lock);
891 	if (found)
892 		mlx5_vdpa_dev_release(priv);
893 	return 0;
894 }
895 
896 static void
mlx5_vdpa_release_dev_resources(struct mlx5_vdpa_priv * priv)897 mlx5_vdpa_release_dev_resources(struct mlx5_vdpa_priv *priv)
898 {
899 	uint32_t i;
900 
901 	if (priv->queues)
902 		mlx5_vdpa_virtqs_cleanup(priv);
903 	mlx5_vdpa_dev_cache_clean(priv);
904 	for (i = 0; i < priv->caps.max_num_virtio_queues; i++) {
905 		if (!priv->virtqs[i].counters)
906 			continue;
907 		claim_zero(mlx5_devx_cmd_destroy(priv->virtqs[i].counters));
908 	}
909 	mlx5_vdpa_event_qp_global_release(priv);
910 	mlx5_vdpa_err_event_unset(priv);
911 	if (priv->steer.tbl)
912 		claim_zero(mlx5_glue->dr_destroy_flow_tbl(priv->steer.tbl));
913 	if (priv->steer.domain)
914 		claim_zero(mlx5_glue->dr_destroy_domain(priv->steer.domain));
915 	if (priv->null_mr)
916 		claim_zero(mlx5_glue->dereg_mr(priv->null_mr));
917 	for (i = 0; i < priv->num_lag_ports; i++) {
918 		if (priv->tiss[i])
919 			claim_zero(mlx5_devx_cmd_destroy(priv->tiss[i]));
920 	}
921 	if (priv->td)
922 		claim_zero(mlx5_devx_cmd_destroy(priv->td));
923 	if (priv->virtq_db_addr)
924 		/* Mask out the within page offset for munmap. */
925 		claim_zero(munmap((void *)((uintptr_t)priv->virtq_db_addr &
926 			~(rte_mem_page_size() - 1)), priv->var->length));
927 	if (priv->var)
928 		mlx5_glue->dv_free_var(priv->var);
929 }
930 
931 static void
mlx5_vdpa_dev_release(struct mlx5_vdpa_priv * priv)932 mlx5_vdpa_dev_release(struct mlx5_vdpa_priv *priv)
933 {
934 	if (priv->state == MLX5_VDPA_STATE_CONFIGURED)
935 		_internal_mlx5_vdpa_dev_close(priv, true);
936 	if (priv->use_c_thread)
937 		mlx5_vdpa_wait_dev_close_tasks_done(priv);
938 	mlx5_vdpa_release_dev_resources(priv);
939 	if (priv->vdev)
940 		rte_vdpa_unregister_device(priv->vdev);
941 	if (priv->use_c_thread)
942 		if (rte_atomic_fetch_sub_explicit(&conf_thread_mng.refcnt,
943 			1, rte_memory_order_relaxed) == 1)
944 			mlx5_vdpa_mult_threads_destroy(true);
945 	rte_free(priv);
946 }
947 
948 static const struct rte_pci_id mlx5_vdpa_pci_id_map[] = {
949 	{
950 		RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
951 				PCI_DEVICE_ID_MELLANOX_CONNECTX6)
952 	},
953 	{
954 		RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
955 				PCI_DEVICE_ID_MELLANOX_CONNECTX6VF)
956 	},
957 	{
958 		RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
959 				PCI_DEVICE_ID_MELLANOX_CONNECTX6DX)
960 	},
961 	{
962 		RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
963 				PCI_DEVICE_ID_MELLANOX_CONNECTXVF)
964 	},
965 	{
966 		RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
967 				PCI_DEVICE_ID_MELLANOX_BLUEFIELD2)
968 	},
969 	{
970 		RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
971 				PCI_DEVICE_ID_MELLANOX_CONNECTX6LX)
972 	},
973 	{
974 		RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
975 				PCI_DEVICE_ID_MELLANOX_CONNECTX7)
976 	},
977 	{
978 		RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
979 				PCI_DEVICE_ID_MELLANOX_BLUEFIELD3)
980 	},
981 	{
982 		.vendor_id = 0
983 	}
984 };
985 
986 static struct mlx5_class_driver mlx5_vdpa_driver = {
987 	.drv_class = MLX5_CLASS_VDPA,
988 	.name = RTE_STR(MLX5_VDPA_DRIVER_NAME),
989 	.id_table = mlx5_vdpa_pci_id_map,
990 	.probe = mlx5_vdpa_dev_probe,
991 	.remove = mlx5_vdpa_dev_remove,
992 };
993 
RTE_LOG_REGISTER_DEFAULT(mlx5_vdpa_logtype,NOTICE)994 RTE_LOG_REGISTER_DEFAULT(mlx5_vdpa_logtype, NOTICE)
995 
996 /**
997  * Driver initialization routine.
998  */
999 RTE_INIT(rte_mlx5_vdpa_init)
1000 {
1001 	mlx5_common_init();
1002 	if (mlx5_glue)
1003 		mlx5_class_driver_register(&mlx5_vdpa_driver);
1004 }
1005 
1006 RTE_PMD_EXPORT_NAME(MLX5_VDPA_DRIVER_NAME, __COUNTER__);
1007 RTE_PMD_REGISTER_PCI_TABLE(MLX5_VDPA_DRIVER_NAME, mlx5_vdpa_pci_id_map);
1008 RTE_PMD_REGISTER_KMOD_DEP(MLX5_VDPA_DRIVER_NAME, "* ib_uverbs & mlx5_core & mlx5_ib");
1009