167b07093SLi Zhang /* SPDX-License-Identifier: BSD-3-Clause
267b07093SLi Zhang * Copyright (c) 2022 NVIDIA Corporation & Affiliates
367b07093SLi Zhang */
467b07093SLi Zhang #include <string.h>
567b07093SLi Zhang #include <unistd.h>
667b07093SLi Zhang #include <sys/eventfd.h>
767b07093SLi Zhang
867b07093SLi Zhang #include <rte_malloc.h>
967b07093SLi Zhang #include <rte_errno.h>
1067b07093SLi Zhang #include <rte_io.h>
1167b07093SLi Zhang #include <rte_alarm.h>
1267b07093SLi Zhang #include <rte_tailq.h>
1367b07093SLi Zhang #include <rte_ring_elem.h>
1469e07f43SLi Zhang #include <rte_ring_peek.h>
1567b07093SLi Zhang
1667b07093SLi Zhang #include <mlx5_common.h>
1767b07093SLi Zhang
1867b07093SLi Zhang #include "mlx5_vdpa_utils.h"
1967b07093SLi Zhang #include "mlx5_vdpa.h"
2067b07093SLi Zhang
2169e07f43SLi Zhang static inline uint32_t
mlx5_vdpa_c_thrd_ring_dequeue_bulk(struct rte_ring * r,void ** obj,uint32_t n,uint32_t * avail)2269e07f43SLi Zhang mlx5_vdpa_c_thrd_ring_dequeue_bulk(struct rte_ring *r,
2369e07f43SLi Zhang void **obj, uint32_t n, uint32_t *avail)
2469e07f43SLi Zhang {
2569e07f43SLi Zhang uint32_t m;
2669e07f43SLi Zhang
2769e07f43SLi Zhang m = rte_ring_dequeue_bulk_elem_start(r, obj,
2869e07f43SLi Zhang sizeof(struct mlx5_vdpa_task), n, avail);
2969e07f43SLi Zhang n = (m == n) ? n : 0;
3069e07f43SLi Zhang rte_ring_dequeue_elem_finish(r, n);
3169e07f43SLi Zhang return n;
3269e07f43SLi Zhang }
3369e07f43SLi Zhang
3469e07f43SLi Zhang static inline uint32_t
mlx5_vdpa_c_thrd_ring_enqueue_bulk(struct rte_ring * r,void * const * obj,uint32_t n,uint32_t * free)3569e07f43SLi Zhang mlx5_vdpa_c_thrd_ring_enqueue_bulk(struct rte_ring *r,
3669e07f43SLi Zhang void * const *obj, uint32_t n, uint32_t *free)
3769e07f43SLi Zhang {
3869e07f43SLi Zhang uint32_t m;
3969e07f43SLi Zhang
4069e07f43SLi Zhang m = rte_ring_enqueue_bulk_elem_start(r, n, free);
4169e07f43SLi Zhang n = (m == n) ? n : 0;
4269e07f43SLi Zhang rte_ring_enqueue_elem_finish(r, obj,
4369e07f43SLi Zhang sizeof(struct mlx5_vdpa_task), n);
4469e07f43SLi Zhang return n;
4569e07f43SLi Zhang }
4669e07f43SLi Zhang
4769e07f43SLi Zhang bool
mlx5_vdpa_task_add(struct mlx5_vdpa_priv * priv,uint32_t thrd_idx,enum mlx5_vdpa_task_type task_type,RTE_ATOMIC (uint32_t)* remaining_cnt,RTE_ATOMIC (uint32_t)* err_cnt,void ** task_data,uint32_t num)4869e07f43SLi Zhang mlx5_vdpa_task_add(struct mlx5_vdpa_priv *priv,
4969e07f43SLi Zhang uint32_t thrd_idx,
5006ebaaeaSLi Zhang enum mlx5_vdpa_task_type task_type,
51*e12a0166STyler Retzlaff RTE_ATOMIC(uint32_t) *remaining_cnt, RTE_ATOMIC(uint32_t) *err_cnt,
5206ebaaeaSLi Zhang void **task_data, uint32_t num)
5369e07f43SLi Zhang {
5469e07f43SLi Zhang struct rte_ring *rng = conf_thread_mng.cthrd[thrd_idx].rng;
5569e07f43SLi Zhang struct mlx5_vdpa_task task[MLX5_VDPA_TASKS_PER_DEV];
5606ebaaeaSLi Zhang uint32_t *data = (uint32_t *)task_data;
5769e07f43SLi Zhang uint32_t i;
5869e07f43SLi Zhang
5969e07f43SLi Zhang MLX5_ASSERT(num <= MLX5_VDPA_TASKS_PER_DEV);
6069e07f43SLi Zhang for (i = 0 ; i < num; i++) {
6169e07f43SLi Zhang task[i].priv = priv;
6269e07f43SLi Zhang /* To be added later. */
6306ebaaeaSLi Zhang task[i].type = task_type;
6406ebaaeaSLi Zhang task[i].remaining_cnt = remaining_cnt;
6506ebaaeaSLi Zhang task[i].err_cnt = err_cnt;
666ebb02b4SLi Zhang if (data)
6706ebaaeaSLi Zhang task[i].idx = data[i];
6869e07f43SLi Zhang }
6969e07f43SLi Zhang if (!mlx5_vdpa_c_thrd_ring_enqueue_bulk(rng, (void **)&task, num, NULL))
7069e07f43SLi Zhang return -1;
7169e07f43SLi Zhang for (i = 0 ; i < num; i++)
7269e07f43SLi Zhang if (task[i].remaining_cnt)
73*e12a0166STyler Retzlaff rte_atomic_fetch_add_explicit(task[i].remaining_cnt, 1,
74*e12a0166STyler Retzlaff rte_memory_order_relaxed);
7569e07f43SLi Zhang /* wake up conf thread. */
7669e07f43SLi Zhang pthread_mutex_lock(&conf_thread_mng.cthrd_lock);
7769e07f43SLi Zhang pthread_cond_signal(&conf_thread_mng.cthrd[thrd_idx].c_cond);
7869e07f43SLi Zhang pthread_mutex_unlock(&conf_thread_mng.cthrd_lock);
7969e07f43SLi Zhang return 0;
8069e07f43SLi Zhang }
8169e07f43SLi Zhang
8206ebaaeaSLi Zhang bool
mlx5_vdpa_c_thread_wait_bulk_tasks_done(RTE_ATOMIC (uint32_t)* remaining_cnt,RTE_ATOMIC (uint32_t)* err_cnt,uint32_t sleep_time)83*e12a0166STyler Retzlaff mlx5_vdpa_c_thread_wait_bulk_tasks_done(RTE_ATOMIC(uint32_t) *remaining_cnt,
84*e12a0166STyler Retzlaff RTE_ATOMIC(uint32_t) *err_cnt, uint32_t sleep_time)
8506ebaaeaSLi Zhang {
8606ebaaeaSLi Zhang /* Check and wait all tasks done. */
87*e12a0166STyler Retzlaff while (rte_atomic_load_explicit(remaining_cnt,
88*e12a0166STyler Retzlaff rte_memory_order_relaxed) != 0) {
8906ebaaeaSLi Zhang rte_delay_us_sleep(sleep_time);
9006ebaaeaSLi Zhang }
91*e12a0166STyler Retzlaff if (rte_atomic_load_explicit(err_cnt,
92*e12a0166STyler Retzlaff rte_memory_order_relaxed)) {
9306ebaaeaSLi Zhang DRV_LOG(ERR, "Tasks done with error.");
9406ebaaeaSLi Zhang return true;
9506ebaaeaSLi Zhang }
9606ebaaeaSLi Zhang return false;
9706ebaaeaSLi Zhang }
9806ebaaeaSLi Zhang
99a7ba40b2SThomas Monjalon static uint32_t
mlx5_vdpa_c_thread_handle(void * arg)10067b07093SLi Zhang mlx5_vdpa_c_thread_handle(void *arg)
10167b07093SLi Zhang {
10269e07f43SLi Zhang struct mlx5_vdpa_conf_thread_mng *multhrd = arg;
1038e72e6bdSLi Zhang struct mlx5_vdpa_virtq *virtq;
10469e07f43SLi Zhang struct mlx5_vdpa_priv *priv;
10569e07f43SLi Zhang struct mlx5_vdpa_task task;
10669e07f43SLi Zhang struct rte_ring *rng;
1070d9d2897SLi Zhang uint64_t features;
10869e07f43SLi Zhang uint32_t thrd_idx;
10969e07f43SLi Zhang uint32_t task_num;
11006ebaaeaSLi Zhang int ret;
11169e07f43SLi Zhang
11269e07f43SLi Zhang for (thrd_idx = 0; thrd_idx < multhrd->max_thrds;
11369e07f43SLi Zhang thrd_idx++)
114a7ba40b2SThomas Monjalon if (rte_thread_equal(multhrd->cthrd[thrd_idx].tid, rte_thread_self()))
11569e07f43SLi Zhang break;
11669e07f43SLi Zhang if (thrd_idx >= multhrd->max_thrds)
117a7ba40b2SThomas Monjalon return 1;
11869e07f43SLi Zhang rng = multhrd->cthrd[thrd_idx].rng;
11969e07f43SLi Zhang while (1) {
12069e07f43SLi Zhang task_num = mlx5_vdpa_c_thrd_ring_dequeue_bulk(rng,
12169e07f43SLi Zhang (void **)&task, 1, NULL);
12269e07f43SLi Zhang if (!task_num) {
12369e07f43SLi Zhang /* No task and condition wait. */
12469e07f43SLi Zhang pthread_mutex_lock(&multhrd->cthrd_lock);
12569e07f43SLi Zhang pthread_cond_wait(
12669e07f43SLi Zhang &multhrd->cthrd[thrd_idx].c_cond,
12769e07f43SLi Zhang &multhrd->cthrd_lock);
12869e07f43SLi Zhang pthread_mutex_unlock(&multhrd->cthrd_lock);
12906ebaaeaSLi Zhang continue;
13069e07f43SLi Zhang }
13169e07f43SLi Zhang priv = task.priv;
13269e07f43SLi Zhang if (priv == NULL)
13369e07f43SLi Zhang continue;
13406ebaaeaSLi Zhang switch (task.type) {
13506ebaaeaSLi Zhang case MLX5_VDPA_TASK_REG_MR:
13606ebaaeaSLi Zhang ret = mlx5_vdpa_register_mr(priv, task.idx);
13706ebaaeaSLi Zhang if (ret) {
13806ebaaeaSLi Zhang DRV_LOG(ERR,
13906ebaaeaSLi Zhang "Failed to register mr %d.", task.idx);
140*e12a0166STyler Retzlaff rte_atomic_fetch_add_explicit(task.err_cnt, 1,
141*e12a0166STyler Retzlaff rte_memory_order_relaxed);
14206ebaaeaSLi Zhang }
14306ebaaeaSLi Zhang break;
1448e72e6bdSLi Zhang case MLX5_VDPA_TASK_SETUP_VIRTQ:
1458e72e6bdSLi Zhang virtq = &priv->virtqs[task.idx];
1468e72e6bdSLi Zhang pthread_mutex_lock(&virtq->virtq_lock);
1478e72e6bdSLi Zhang ret = mlx5_vdpa_virtq_setup(priv,
1488e72e6bdSLi Zhang task.idx, false);
1498e72e6bdSLi Zhang if (ret) {
1508e72e6bdSLi Zhang DRV_LOG(ERR,
1518e72e6bdSLi Zhang "Failed to setup virtq %d.", task.idx);
152*e12a0166STyler Retzlaff rte_atomic_fetch_add_explicit(
153*e12a0166STyler Retzlaff task.err_cnt, 1, rte_memory_order_relaxed);
1548e72e6bdSLi Zhang }
155cac75b2dSLi Zhang virtq->enable = 1;
1568e72e6bdSLi Zhang pthread_mutex_unlock(&virtq->virtq_lock);
1578e72e6bdSLi Zhang break;
1580d9d2897SLi Zhang case MLX5_VDPA_TASK_STOP_VIRTQ:
1590d9d2897SLi Zhang virtq = &priv->virtqs[task.idx];
1600d9d2897SLi Zhang pthread_mutex_lock(&virtq->virtq_lock);
1610d9d2897SLi Zhang ret = mlx5_vdpa_virtq_stop(priv,
1620d9d2897SLi Zhang task.idx);
1630d9d2897SLi Zhang if (ret) {
1640d9d2897SLi Zhang DRV_LOG(ERR,
1650d9d2897SLi Zhang "Failed to stop virtq %d.",
1660d9d2897SLi Zhang task.idx);
167*e12a0166STyler Retzlaff rte_atomic_fetch_add_explicit(
1680d9d2897SLi Zhang task.err_cnt, 1,
169*e12a0166STyler Retzlaff rte_memory_order_relaxed);
1700d9d2897SLi Zhang pthread_mutex_unlock(&virtq->virtq_lock);
1710d9d2897SLi Zhang break;
1720d9d2897SLi Zhang }
1730d9d2897SLi Zhang ret = rte_vhost_get_negotiated_features(
1740d9d2897SLi Zhang priv->vid, &features);
1750d9d2897SLi Zhang if (ret) {
1760d9d2897SLi Zhang DRV_LOG(ERR,
1770d9d2897SLi Zhang "Failed to get negotiated features virtq %d.",
1780d9d2897SLi Zhang task.idx);
179*e12a0166STyler Retzlaff rte_atomic_fetch_add_explicit(
1800d9d2897SLi Zhang task.err_cnt, 1,
181*e12a0166STyler Retzlaff rte_memory_order_relaxed);
1820d9d2897SLi Zhang pthread_mutex_unlock(&virtq->virtq_lock);
1830d9d2897SLi Zhang break;
1840d9d2897SLi Zhang }
1850d9d2897SLi Zhang if (RTE_VHOST_NEED_LOG(features))
1860d9d2897SLi Zhang rte_vhost_log_used_vring(
1870d9d2897SLi Zhang priv->vid, task.idx, 0,
1880d9d2897SLi Zhang MLX5_VDPA_USED_RING_LEN(virtq->vq_size));
1890d9d2897SLi Zhang pthread_mutex_unlock(&virtq->virtq_lock);
1900d9d2897SLi Zhang break;
1916ebb02b4SLi Zhang case MLX5_VDPA_TASK_DEV_CLOSE_NOWAIT:
1926ebb02b4SLi Zhang pthread_mutex_lock(&priv->steer_update_lock);
1936ebb02b4SLi Zhang mlx5_vdpa_steer_unset(priv);
1946ebb02b4SLi Zhang pthread_mutex_unlock(&priv->steer_update_lock);
195cac75b2dSLi Zhang mlx5_vdpa_virtqs_release(priv, false);
1966ebb02b4SLi Zhang mlx5_vdpa_drain_cq(priv);
1976ebb02b4SLi Zhang if (priv->lm_mr.addr)
1986ebb02b4SLi Zhang mlx5_os_wrapped_mkey_destroy(
1996ebb02b4SLi Zhang &priv->lm_mr);
2006ebb02b4SLi Zhang if (!priv->connected)
2016ebb02b4SLi Zhang mlx5_vdpa_dev_cache_clean(priv);
2026ebb02b4SLi Zhang priv->vid = 0;
203*e12a0166STyler Retzlaff rte_atomic_store_explicit(
2046ebb02b4SLi Zhang &priv->dev_close_progress, 0,
205*e12a0166STyler Retzlaff rte_memory_order_relaxed);
2066ebb02b4SLi Zhang break;
207cac75b2dSLi Zhang case MLX5_VDPA_TASK_PREPARE_VIRTQ:
208cac75b2dSLi Zhang ret = mlx5_vdpa_virtq_single_resource_prepare(
209cac75b2dSLi Zhang priv, task.idx);
210cac75b2dSLi Zhang if (ret) {
211cac75b2dSLi Zhang DRV_LOG(ERR,
212cac75b2dSLi Zhang "Failed to prepare virtq %d.",
213cac75b2dSLi Zhang task.idx);
214*e12a0166STyler Retzlaff rte_atomic_fetch_add_explicit(
215cac75b2dSLi Zhang task.err_cnt, 1,
216*e12a0166STyler Retzlaff rte_memory_order_relaxed);
217cac75b2dSLi Zhang }
218cac75b2dSLi Zhang break;
21906ebaaeaSLi Zhang default:
22006ebaaeaSLi Zhang DRV_LOG(ERR, "Invalid vdpa task type %d.",
22106ebaaeaSLi Zhang task.type);
22206ebaaeaSLi Zhang break;
22306ebaaeaSLi Zhang }
22406ebaaeaSLi Zhang if (task.remaining_cnt)
225*e12a0166STyler Retzlaff rte_atomic_fetch_sub_explicit(task.remaining_cnt,
226*e12a0166STyler Retzlaff 1, rte_memory_order_relaxed);
22769e07f43SLi Zhang }
228a7ba40b2SThomas Monjalon return 0;
22967b07093SLi Zhang }
23067b07093SLi Zhang
23167b07093SLi Zhang static void
mlx5_vdpa_c_thread_destroy(uint32_t thrd_idx,bool need_unlock)23267b07093SLi Zhang mlx5_vdpa_c_thread_destroy(uint32_t thrd_idx, bool need_unlock)
23367b07093SLi Zhang {
234a7ba40b2SThomas Monjalon pthread_t *tid = (pthread_t *)&conf_thread_mng.cthrd[thrd_idx].tid.opaque_id;
235a7ba40b2SThomas Monjalon if (*tid != 0) {
236a7ba40b2SThomas Monjalon pthread_cancel(*tid);
237a7ba40b2SThomas Monjalon rte_thread_join(conf_thread_mng.cthrd[thrd_idx].tid, NULL);
238a7ba40b2SThomas Monjalon *tid = 0;
23967b07093SLi Zhang if (need_unlock)
24067b07093SLi Zhang pthread_mutex_init(&conf_thread_mng.cthrd_lock, NULL);
24167b07093SLi Zhang }
24269e07f43SLi Zhang if (conf_thread_mng.cthrd[thrd_idx].rng) {
24369e07f43SLi Zhang rte_ring_free(conf_thread_mng.cthrd[thrd_idx].rng);
24469e07f43SLi Zhang conf_thread_mng.cthrd[thrd_idx].rng = NULL;
24569e07f43SLi Zhang }
24667b07093SLi Zhang }
24767b07093SLi Zhang
24867b07093SLi Zhang static int
mlx5_vdpa_c_thread_create(void)249a7ba40b2SThomas Monjalon mlx5_vdpa_c_thread_create(void)
25067b07093SLi Zhang {
25167b07093SLi Zhang uint32_t thrd_idx;
25269e07f43SLi Zhang uint32_t ring_num;
253a7ba40b2SThomas Monjalon char name[RTE_RING_NAMESIZE];
25467b07093SLi Zhang int ret;
25567b07093SLi Zhang
25667b07093SLi Zhang pthread_mutex_lock(&conf_thread_mng.cthrd_lock);
25769e07f43SLi Zhang ring_num = MLX5_VDPA_MAX_TASKS_PER_THRD / conf_thread_mng.max_thrds;
25869e07f43SLi Zhang if (!ring_num) {
25969e07f43SLi Zhang DRV_LOG(ERR, "Invalid ring number for thread.");
26069e07f43SLi Zhang goto c_thread_err;
26169e07f43SLi Zhang }
26267b07093SLi Zhang for (thrd_idx = 0; thrd_idx < conf_thread_mng.max_thrds;
26367b07093SLi Zhang thrd_idx++) {
26469e07f43SLi Zhang snprintf(name, sizeof(name), "vDPA-mthread-ring-%d",
26569e07f43SLi Zhang thrd_idx);
26669e07f43SLi Zhang conf_thread_mng.cthrd[thrd_idx].rng = rte_ring_create_elem(name,
26769e07f43SLi Zhang sizeof(struct mlx5_vdpa_task), ring_num,
26869e07f43SLi Zhang rte_socket_id(),
26969e07f43SLi Zhang RING_F_MP_HTS_ENQ | RING_F_MC_HTS_DEQ |
27069e07f43SLi Zhang RING_F_EXACT_SZ);
27169e07f43SLi Zhang if (!conf_thread_mng.cthrd[thrd_idx].rng) {
27269e07f43SLi Zhang DRV_LOG(ERR,
27369e07f43SLi Zhang "Failed to create vdpa multi-threads %d ring.",
27469e07f43SLi Zhang thrd_idx);
27569e07f43SLi Zhang goto c_thread_err;
27669e07f43SLi Zhang }
277a7ba40b2SThomas Monjalon snprintf(name, RTE_THREAD_INTERNAL_NAME_SIZE, "vmlx5-c%d", thrd_idx);
278a7ba40b2SThomas Monjalon ret = rte_thread_create_internal_control(&conf_thread_mng.cthrd[thrd_idx].tid,
279a7ba40b2SThomas Monjalon name,
280a7ba40b2SThomas Monjalon mlx5_vdpa_c_thread_handle, &conf_thread_mng);
28167b07093SLi Zhang if (ret) {
28267b07093SLi Zhang DRV_LOG(ERR, "Failed to create vdpa multi-threads %d.",
28367b07093SLi Zhang thrd_idx);
28467b07093SLi Zhang goto c_thread_err;
28567b07093SLi Zhang }
28669e07f43SLi Zhang pthread_cond_init(&conf_thread_mng.cthrd[thrd_idx].c_cond,
28769e07f43SLi Zhang NULL);
28867b07093SLi Zhang }
28967b07093SLi Zhang pthread_mutex_unlock(&conf_thread_mng.cthrd_lock);
29067b07093SLi Zhang return 0;
29167b07093SLi Zhang c_thread_err:
29267b07093SLi Zhang for (thrd_idx = 0; thrd_idx < conf_thread_mng.max_thrds;
29367b07093SLi Zhang thrd_idx++)
29467b07093SLi Zhang mlx5_vdpa_c_thread_destroy(thrd_idx, false);
29567b07093SLi Zhang pthread_mutex_unlock(&conf_thread_mng.cthrd_lock);
29667b07093SLi Zhang return -1;
29767b07093SLi Zhang }
29867b07093SLi Zhang
29967b07093SLi Zhang int
mlx5_vdpa_mult_threads_create(void)300a7ba40b2SThomas Monjalon mlx5_vdpa_mult_threads_create(void)
30167b07093SLi Zhang {
30267b07093SLi Zhang pthread_mutex_init(&conf_thread_mng.cthrd_lock, NULL);
303a7ba40b2SThomas Monjalon if (mlx5_vdpa_c_thread_create()) {
30467b07093SLi Zhang DRV_LOG(ERR, "Cannot create vDPA configuration threads.");
30567b07093SLi Zhang mlx5_vdpa_mult_threads_destroy(false);
30667b07093SLi Zhang return -1;
30767b07093SLi Zhang }
30867b07093SLi Zhang return 0;
30967b07093SLi Zhang }
31067b07093SLi Zhang
31167b07093SLi Zhang void
mlx5_vdpa_mult_threads_destroy(bool need_unlock)31267b07093SLi Zhang mlx5_vdpa_mult_threads_destroy(bool need_unlock)
31367b07093SLi Zhang {
31467b07093SLi Zhang uint32_t thrd_idx;
31567b07093SLi Zhang
31667b07093SLi Zhang if (!conf_thread_mng.initializer_priv)
31767b07093SLi Zhang return;
31867b07093SLi Zhang for (thrd_idx = 0; thrd_idx < conf_thread_mng.max_thrds;
31967b07093SLi Zhang thrd_idx++)
32067b07093SLi Zhang mlx5_vdpa_c_thread_destroy(thrd_idx, need_unlock);
32167b07093SLi Zhang pthread_mutex_destroy(&conf_thread_mng.cthrd_lock);
32267b07093SLi Zhang memset(&conf_thread_mng, 0, sizeof(struct mlx5_vdpa_conf_thread_mng));
32367b07093SLi Zhang }
324