xref: /dpdk/drivers/vdpa/mlx5/mlx5_vdpa_mem.c (revision e12a0166c80f65e35408f4715b2f3a60763c3741)
1cc07a42dSMatan Azrad /* SPDX-License-Identifier: BSD-3-Clause
2cc07a42dSMatan Azrad  * Copyright 2019 Mellanox Technologies, Ltd
3cc07a42dSMatan Azrad  */
4cc07a42dSMatan Azrad #include <stdlib.h>
5cc07a42dSMatan Azrad 
6cc07a42dSMatan Azrad #include <rte_malloc.h>
7cc07a42dSMatan Azrad #include <rte_errno.h>
8cc07a42dSMatan Azrad #include <rte_common.h>
9cc07a42dSMatan Azrad #include <rte_sched_common.h>
10cc07a42dSMatan Azrad 
11cc07a42dSMatan Azrad #include <mlx5_prm.h>
12cc07a42dSMatan Azrad #include <mlx5_common.h>
13cc07a42dSMatan Azrad 
14cc07a42dSMatan Azrad #include "mlx5_vdpa_utils.h"
15cc07a42dSMatan Azrad #include "mlx5_vdpa.h"
16cc07a42dSMatan Azrad 
17cc07a42dSMatan Azrad void
mlx5_vdpa_mem_dereg(struct mlx5_vdpa_priv * priv)18cc07a42dSMatan Azrad mlx5_vdpa_mem_dereg(struct mlx5_vdpa_priv *priv)
19cc07a42dSMatan Azrad {
2006ebaaeaSLi Zhang 	struct mlx5_vdpa_query_mr *mrs =
2106ebaaeaSLi Zhang 		(struct mlx5_vdpa_query_mr *)priv->mrs;
22cc07a42dSMatan Azrad 	struct mlx5_vdpa_query_mr *entry;
2306ebaaeaSLi Zhang 	int i;
24cc07a42dSMatan Azrad 
2506ebaaeaSLi Zhang 	if (priv->mrs) {
2606ebaaeaSLi Zhang 		for (i = priv->num_mrs - 1; i >= 0; i--) {
2706ebaaeaSLi Zhang 			entry = &mrs[i];
2806ebaaeaSLi Zhang 			if (entry->is_indirect) {
2906ebaaeaSLi Zhang 				if (entry->mkey)
3006ebaaeaSLi Zhang 					claim_zero(
3106ebaaeaSLi Zhang 					mlx5_devx_cmd_destroy(entry->mkey));
3206ebaaeaSLi Zhang 			} else {
3306ebaaeaSLi Zhang 				if (entry->mr)
3406ebaaeaSLi Zhang 					claim_zero(
3506ebaaeaSLi Zhang 					mlx5_glue->dereg_mr(entry->mr));
36cc07a42dSMatan Azrad 			}
37cc07a42dSMatan Azrad 		}
3806ebaaeaSLi Zhang 		rte_free(priv->mrs);
3906ebaaeaSLi Zhang 		priv->mrs = NULL;
4006ebaaeaSLi Zhang 		priv->num_mrs = 0;
4106ebaaeaSLi Zhang 	}
4206ebaaeaSLi Zhang 	if (priv->vmem_info.vmem) {
4306ebaaeaSLi Zhang 		free(priv->vmem_info.vmem);
4406ebaaeaSLi Zhang 		priv->vmem_info.vmem = NULL;
4506ebaaeaSLi Zhang 	}
4606ebaaeaSLi Zhang 	priv->gpa_mkey_index = 0;
47cc07a42dSMatan Azrad }
48cc07a42dSMatan Azrad 
49cc07a42dSMatan Azrad static int
mlx5_vdpa_regions_addr_cmp(const void * a,const void * b)50cc07a42dSMatan Azrad mlx5_vdpa_regions_addr_cmp(const void *a, const void *b)
51cc07a42dSMatan Azrad {
52cc07a42dSMatan Azrad 	const struct rte_vhost_mem_region *region_a = a;
53cc07a42dSMatan Azrad 	const struct rte_vhost_mem_region *region_b = b;
54cc07a42dSMatan Azrad 
55cc07a42dSMatan Azrad 	if (region_a->guest_phys_addr < region_b->guest_phys_addr)
56cc07a42dSMatan Azrad 		return -1;
57cc07a42dSMatan Azrad 	if (region_a->guest_phys_addr > region_b->guest_phys_addr)
58cc07a42dSMatan Azrad 		return 1;
59cc07a42dSMatan Azrad 	return 0;
60cc07a42dSMatan Azrad }
61cc07a42dSMatan Azrad 
62cc07a42dSMatan Azrad #define KLM_NUM_MAX_ALIGN(sz) (RTE_ALIGN_CEIL(sz, MLX5_MAX_KLM_BYTE_COUNT) / \
63cc07a42dSMatan Azrad 			       MLX5_MAX_KLM_BYTE_COUNT)
64cc07a42dSMatan Azrad 
65cc07a42dSMatan Azrad /*
66cc07a42dSMatan Azrad  * Allocate and sort the region list and choose indirect mkey mode:
67cc07a42dSMatan Azrad  *   1. Calculate GCD, guest memory size and indirect mkey entries num per mode.
68cc07a42dSMatan Azrad  *   2. Align GCD to the maximum allowed size(2G) and to be power of 2.
69cc07a42dSMatan Azrad  *   2. Decide the indirect mkey mode according to the next rules:
70cc07a42dSMatan Azrad  *         a. If both KLM_FBS entries number and KLM entries number are bigger
71cc07a42dSMatan Azrad  *            than the maximum allowed(MLX5_DEVX_MAX_KLM_ENTRIES) - error.
72cc07a42dSMatan Azrad  *         b. KLM mode if KLM_FBS entries number is bigger than the maximum
73cc07a42dSMatan Azrad  *            allowed(MLX5_DEVX_MAX_KLM_ENTRIES).
74cc07a42dSMatan Azrad  *         c. KLM mode if GCD is smaller than the minimum allowed(4K).
75cc07a42dSMatan Azrad  *         d. KLM mode if the total size of KLM entries is in one cache line
76cc07a42dSMatan Azrad  *            and the total size of KLM_FBS entries is not in one cache line.
77cc07a42dSMatan Azrad  *         e. Otherwise, KLM_FBS mode.
78cc07a42dSMatan Azrad  */
79cc07a42dSMatan Azrad static struct rte_vhost_memory *
mlx5_vdpa_vhost_mem_regions_prepare(int vid,uint8_t * mode,uint64_t * mem_size,uint64_t * gcd,uint32_t * entries_num)80cc07a42dSMatan Azrad mlx5_vdpa_vhost_mem_regions_prepare(int vid, uint8_t *mode, uint64_t *mem_size,
81cc07a42dSMatan Azrad 				    uint64_t *gcd, uint32_t *entries_num)
82cc07a42dSMatan Azrad {
83cc07a42dSMatan Azrad 	struct rte_vhost_memory *mem;
84cc07a42dSMatan Azrad 	uint64_t size;
85cc07a42dSMatan Azrad 	uint64_t klm_entries_num = 0;
86cc07a42dSMatan Azrad 	uint64_t klm_fbs_entries_num;
87cc07a42dSMatan Azrad 	uint32_t i;
88cc07a42dSMatan Azrad 	int ret = rte_vhost_get_mem_table(vid, &mem);
89cc07a42dSMatan Azrad 
90cc07a42dSMatan Azrad 	if (ret < 0) {
91cc07a42dSMatan Azrad 		DRV_LOG(ERR, "Failed to get VM memory layout vid =%d.", vid);
92cc07a42dSMatan Azrad 		rte_errno = EINVAL;
93cc07a42dSMatan Azrad 		return NULL;
94cc07a42dSMatan Azrad 	}
95cc07a42dSMatan Azrad 	qsort(mem->regions, mem->nregions, sizeof(mem->regions[0]),
96cc07a42dSMatan Azrad 	      mlx5_vdpa_regions_addr_cmp);
97cc07a42dSMatan Azrad 	*mem_size = (mem->regions[(mem->nregions - 1)].guest_phys_addr) +
98cc07a42dSMatan Azrad 				      (mem->regions[(mem->nregions - 1)].size) -
99cc07a42dSMatan Azrad 					      (mem->regions[0].guest_phys_addr);
100cc07a42dSMatan Azrad 	*gcd = 0;
101cc07a42dSMatan Azrad 	for (i = 0; i < mem->nregions; ++i) {
102cc07a42dSMatan Azrad 		DRV_LOG(INFO,  "Region %u: HVA 0x%" PRIx64 ", GPA 0x%" PRIx64
103cc07a42dSMatan Azrad 			", size 0x%" PRIx64 ".", i,
104cc07a42dSMatan Azrad 			mem->regions[i].host_user_addr,
105cc07a42dSMatan Azrad 			mem->regions[i].guest_phys_addr, mem->regions[i].size);
106cc07a42dSMatan Azrad 		if (i > 0) {
107cc07a42dSMatan Azrad 			/* Hole handle. */
108cc07a42dSMatan Azrad 			size = mem->regions[i].guest_phys_addr -
109cc07a42dSMatan Azrad 				(mem->regions[i - 1].guest_phys_addr +
110cc07a42dSMatan Azrad 				 mem->regions[i - 1].size);
1116e914454SXueming Li 			*gcd = rte_get_gcd64(*gcd, size);
112cc07a42dSMatan Azrad 			klm_entries_num += KLM_NUM_MAX_ALIGN(size);
113cc07a42dSMatan Azrad 		}
114cc07a42dSMatan Azrad 		size = mem->regions[i].size;
1156e914454SXueming Li 		*gcd = rte_get_gcd64(*gcd, size);
116cc07a42dSMatan Azrad 		klm_entries_num += KLM_NUM_MAX_ALIGN(size);
117cc07a42dSMatan Azrad 	}
118cc07a42dSMatan Azrad 	if (*gcd > MLX5_MAX_KLM_BYTE_COUNT)
1196e914454SXueming Li 		*gcd = rte_get_gcd64(*gcd, MLX5_MAX_KLM_BYTE_COUNT);
120cc07a42dSMatan Azrad 	if (!RTE_IS_POWER_OF_2(*gcd)) {
121cc07a42dSMatan Azrad 		uint64_t candidate_gcd = rte_align64prevpow2(*gcd);
122cc07a42dSMatan Azrad 
123cc07a42dSMatan Azrad 		while (candidate_gcd > 1 && (*gcd % candidate_gcd))
124cc07a42dSMatan Azrad 			candidate_gcd /= 2;
125cc07a42dSMatan Azrad 		DRV_LOG(DEBUG, "GCD 0x%" PRIx64 " is not power of 2. Adjusted "
126cc07a42dSMatan Azrad 			"GCD is 0x%" PRIx64 ".", *gcd, candidate_gcd);
127cc07a42dSMatan Azrad 		*gcd = candidate_gcd;
128cc07a42dSMatan Azrad 	}
129cc07a42dSMatan Azrad 	klm_fbs_entries_num = *mem_size / *gcd;
130cc07a42dSMatan Azrad 	if (*gcd < MLX5_MIN_KLM_FIXED_BUFFER_SIZE || klm_fbs_entries_num >
131cc07a42dSMatan Azrad 	    MLX5_DEVX_MAX_KLM_ENTRIES ||
132cc07a42dSMatan Azrad 	    ((klm_entries_num * sizeof(struct mlx5_klm)) <=
133cc07a42dSMatan Azrad 	    RTE_CACHE_LINE_SIZE && (klm_fbs_entries_num *
134cc07a42dSMatan Azrad 				    sizeof(struct mlx5_klm)) >
135cc07a42dSMatan Azrad 							RTE_CACHE_LINE_SIZE)) {
136cc07a42dSMatan Azrad 		*mode = MLX5_MKC_ACCESS_MODE_KLM;
137cc07a42dSMatan Azrad 		*entries_num = klm_entries_num;
138cc07a42dSMatan Azrad 		DRV_LOG(INFO, "Indirect mkey mode is KLM.");
139cc07a42dSMatan Azrad 	} else {
140cc07a42dSMatan Azrad 		*mode = MLX5_MKC_ACCESS_MODE_KLM_FBS;
141cc07a42dSMatan Azrad 		*entries_num = klm_fbs_entries_num;
142cc07a42dSMatan Azrad 		DRV_LOG(INFO, "Indirect mkey mode is KLM Fixed Buffer Size.");
143cc07a42dSMatan Azrad 	}
144cc07a42dSMatan Azrad 	DRV_LOG(DEBUG, "Memory registration information: nregions = %u, "
145cc07a42dSMatan Azrad 		"mem_size = 0x%" PRIx64 ", GCD = 0x%" PRIx64
146cc07a42dSMatan Azrad 		", klm_fbs_entries_num = 0x%" PRIx64 ", klm_entries_num = 0x%"
147cc07a42dSMatan Azrad 		PRIx64 ".", mem->nregions, *mem_size, *gcd, klm_fbs_entries_num,
148cc07a42dSMatan Azrad 		klm_entries_num);
149cc07a42dSMatan Azrad 	if (*entries_num > MLX5_DEVX_MAX_KLM_ENTRIES) {
150cc07a42dSMatan Azrad 		DRV_LOG(ERR, "Failed to prepare memory of vid %d - memory is "
151cc07a42dSMatan Azrad 			"too fragmented.", vid);
152cc07a42dSMatan Azrad 		free(mem);
153cc07a42dSMatan Azrad 		return NULL;
154cc07a42dSMatan Azrad 	}
155cc07a42dSMatan Azrad 	return mem;
156cc07a42dSMatan Azrad }
157cc07a42dSMatan Azrad 
158934ef2b6SXueming Li static int
mlx5_vdpa_mem_cmp(struct rte_vhost_memory * mem1,struct rte_vhost_memory * mem2)159934ef2b6SXueming Li mlx5_vdpa_mem_cmp(struct rte_vhost_memory *mem1, struct rte_vhost_memory *mem2)
160934ef2b6SXueming Li {
161934ef2b6SXueming Li 	uint32_t i;
162934ef2b6SXueming Li 
163934ef2b6SXueming Li 	if (mem1->nregions != mem2->nregions)
164934ef2b6SXueming Li 		return -1;
165934ef2b6SXueming Li 	for (i = 0; i < mem1->nregions; i++) {
166934ef2b6SXueming Li 		if (mem1->regions[i].guest_phys_addr !=
167934ef2b6SXueming Li 		    mem2->regions[i].guest_phys_addr)
168934ef2b6SXueming Li 			return -1;
169934ef2b6SXueming Li 		if (mem1->regions[i].size != mem2->regions[i].size)
170934ef2b6SXueming Li 			return -1;
171934ef2b6SXueming Li 	}
172934ef2b6SXueming Li 	return 0;
173934ef2b6SXueming Li }
174934ef2b6SXueming Li 
175cc07a42dSMatan Azrad #define KLM_SIZE_MAX_ALIGN(sz) ((sz) > MLX5_MAX_KLM_BYTE_COUNT ? \
176cc07a42dSMatan Azrad 				MLX5_MAX_KLM_BYTE_COUNT : (sz))
177cc07a42dSMatan Azrad 
17806ebaaeaSLi Zhang static int
mlx5_vdpa_create_indirect_mkey(struct mlx5_vdpa_priv * priv)17906ebaaeaSLi Zhang mlx5_vdpa_create_indirect_mkey(struct mlx5_vdpa_priv *priv)
180cc07a42dSMatan Azrad {
181cc07a42dSMatan Azrad 	struct mlx5_devx_mkey_attr mkey_attr;
18206ebaaeaSLi Zhang 	struct mlx5_vdpa_query_mr *mrs =
18306ebaaeaSLi Zhang 		(struct mlx5_vdpa_query_mr *)priv->mrs;
18406ebaaeaSLi Zhang 	struct mlx5_vdpa_query_mr *entry;
18506ebaaeaSLi Zhang 	struct rte_vhost_mem_region *reg;
18606ebaaeaSLi Zhang 	uint8_t mode = priv->vmem_info.mode;
18706ebaaeaSLi Zhang 	uint32_t entries_num = priv->vmem_info.entries_num;
18806ebaaeaSLi Zhang 	struct rte_vhost_memory *mem = priv->vmem_info.vmem;
189cc07a42dSMatan Azrad 	struct mlx5_klm klm_array[entries_num];
19006ebaaeaSLi Zhang 	uint64_t gcd = priv->vmem_info.gcd;
19106ebaaeaSLi Zhang 	int ret = -rte_errno;
19206ebaaeaSLi Zhang 	uint64_t klm_size;
19306ebaaeaSLi Zhang 	int klm_index = 0;
19406ebaaeaSLi Zhang 	uint64_t k;
19506ebaaeaSLi Zhang 	uint32_t i;
196cc07a42dSMatan Azrad 
19706ebaaeaSLi Zhang 	/* If it is the last entry, create indirect mkey. */
198cc07a42dSMatan Azrad 	for (i = 0; i < mem->nregions; i++) {
19906ebaaeaSLi Zhang 		entry = &mrs[i];
200cc07a42dSMatan Azrad 		reg = &mem->regions[i];
201cc07a42dSMatan Azrad 		if (i > 0) {
202cc07a42dSMatan Azrad 			uint64_t sadd;
203cc07a42dSMatan Azrad 			uint64_t empty_region_sz = reg->guest_phys_addr -
204cc07a42dSMatan Azrad 					  (mem->regions[i - 1].guest_phys_addr +
205cc07a42dSMatan Azrad 					   mem->regions[i - 1].size);
206cc07a42dSMatan Azrad 
207cc07a42dSMatan Azrad 			if (empty_region_sz > 0) {
208cc07a42dSMatan Azrad 				sadd = mem->regions[i - 1].guest_phys_addr +
209cc07a42dSMatan Azrad 				       mem->regions[i - 1].size;
210cc07a42dSMatan Azrad 				klm_size = mode == MLX5_MKC_ACCESS_MODE_KLM ?
211cc07a42dSMatan Azrad 				      KLM_SIZE_MAX_ALIGN(empty_region_sz) : gcd;
212cc07a42dSMatan Azrad 				for (k = 0; k < empty_region_sz;
213cc07a42dSMatan Azrad 				     k += klm_size) {
214cc07a42dSMatan Azrad 					klm_array[klm_index].byte_count =
215cc07a42dSMatan Azrad 						k + klm_size > empty_region_sz ?
216cc07a42dSMatan Azrad 						 empty_region_sz - k : klm_size;
217cc07a42dSMatan Azrad 					klm_array[klm_index].mkey =
218cc07a42dSMatan Azrad 							    priv->null_mr->lkey;
219cc07a42dSMatan Azrad 					klm_array[klm_index].address = sadd + k;
220cc07a42dSMatan Azrad 					klm_index++;
221cc07a42dSMatan Azrad 				}
222cc07a42dSMatan Azrad 			}
223cc07a42dSMatan Azrad 		}
224cc07a42dSMatan Azrad 		klm_size = mode == MLX5_MKC_ACCESS_MODE_KLM ?
225cc07a42dSMatan Azrad 					    KLM_SIZE_MAX_ALIGN(reg->size) : gcd;
226cc07a42dSMatan Azrad 		for (k = 0; k < reg->size; k += klm_size) {
227cc07a42dSMatan Azrad 			klm_array[klm_index].byte_count = k + klm_size >
228cc07a42dSMatan Azrad 					   reg->size ? reg->size - k : klm_size;
22904b4e4cbSMichael Baum 			klm_array[klm_index].mkey = entry->mr->lkey;
230cc07a42dSMatan Azrad 			klm_array[klm_index].address = reg->guest_phys_addr + k;
231cc07a42dSMatan Azrad 			klm_index++;
232cc07a42dSMatan Azrad 		}
233cc07a42dSMatan Azrad 	}
23404b4e4cbSMichael Baum 	memset(&mkey_attr, 0, sizeof(mkey_attr));
235cc07a42dSMatan Azrad 	mkey_attr.addr = (uintptr_t)(mem->regions[0].guest_phys_addr);
23606ebaaeaSLi Zhang 	mkey_attr.size = priv->vmem_info.size;
237e35ccf24SMichael Baum 	mkey_attr.pd = priv->cdev->pdn;
238cc07a42dSMatan Azrad 	mkey_attr.umem_id = 0;
239cc07a42dSMatan Azrad 	/* Must be zero for KLM mode. */
240cc07a42dSMatan Azrad 	mkey_attr.log_entity_size = mode == MLX5_MKC_ACCESS_MODE_KLM_FBS ?
241cc07a42dSMatan Azrad 							  rte_log2_u64(gcd) : 0;
242cc07a42dSMatan Azrad 	mkey_attr.pg_access = 0;
243cc07a42dSMatan Azrad 	mkey_attr.klm_array = klm_array;
244cc07a42dSMatan Azrad 	mkey_attr.klm_num = klm_index;
24506ebaaeaSLi Zhang 	entry = &mrs[mem->nregions];
246662d0dc6SMichael Baum 	entry->mkey = mlx5_devx_cmd_mkey_create(priv->cdev->ctx, &mkey_attr);
247cc07a42dSMatan Azrad 	if (!entry->mkey) {
248cc07a42dSMatan Azrad 		DRV_LOG(ERR, "Failed to create indirect Mkey.");
24906ebaaeaSLi Zhang 		rte_errno = -ret;
25006ebaaeaSLi Zhang 		return ret;
251cc07a42dSMatan Azrad 	}
252cc07a42dSMatan Azrad 	entry->is_indirect = 1;
253cc07a42dSMatan Azrad 	priv->gpa_mkey_index = entry->mkey->id;
254cc07a42dSMatan Azrad 	return 0;
25506ebaaeaSLi Zhang }
25606ebaaeaSLi Zhang 
25706ebaaeaSLi Zhang /*
25806ebaaeaSLi Zhang  * The target here is to group all the physical memory regions of the
25906ebaaeaSLi Zhang  * virtio device in one indirect mkey.
26006ebaaeaSLi Zhang  * For KLM Fixed Buffer Size mode (HW find the translation entry in one
26106ebaaeaSLi Zhang  * read according to the guest phisical address):
26206ebaaeaSLi Zhang  * All the sub-direct mkeys of it must be in the same size, hence, each
26306ebaaeaSLi Zhang  * one of them should be in the GCD size of all the virtio memory
26406ebaaeaSLi Zhang  * regions and the holes between them.
26506ebaaeaSLi Zhang  * For KLM mode (each entry may be in different size so HW must iterate
26606ebaaeaSLi Zhang  * the entries):
26706ebaaeaSLi Zhang  * Each virtio memory region and each hole between them have one entry,
26806ebaaeaSLi Zhang  * just need to cover the maximum allowed size(2G) by splitting entries
26906ebaaeaSLi Zhang  * which their associated memory regions are bigger than 2G.
27006ebaaeaSLi Zhang  * It means that each virtio memory region may be mapped to more than
27106ebaaeaSLi Zhang  * one direct mkey in the 2 modes.
27206ebaaeaSLi Zhang  * All the holes of invalid memory between the virtio memory regions
27306ebaaeaSLi Zhang  * will be mapped to the null memory region for security.
27406ebaaeaSLi Zhang  */
27506ebaaeaSLi Zhang int
mlx5_vdpa_mem_register(struct mlx5_vdpa_priv * priv)27606ebaaeaSLi Zhang mlx5_vdpa_mem_register(struct mlx5_vdpa_priv *priv)
27706ebaaeaSLi Zhang {
27806ebaaeaSLi Zhang 	void *mrs;
27906ebaaeaSLi Zhang 	uint8_t mode = 0;
28006ebaaeaSLi Zhang 	int ret = -rte_errno;
28106ebaaeaSLi Zhang 	uint32_t i, thrd_idx, data[1];
282*e12a0166STyler Retzlaff 	RTE_ATOMIC(uint32_t) remaining_cnt = 0;
283*e12a0166STyler Retzlaff 	RTE_ATOMIC(uint32_t) err_cnt = 0;
284*e12a0166STyler Retzlaff 	uint32_t task_num = 0;
28506ebaaeaSLi Zhang 	struct rte_vhost_memory *mem = mlx5_vdpa_vhost_mem_regions_prepare
28606ebaaeaSLi Zhang 			(priv->vid, &mode, &priv->vmem_info.size,
28706ebaaeaSLi Zhang 			&priv->vmem_info.gcd, &priv->vmem_info.entries_num);
28806ebaaeaSLi Zhang 
28906ebaaeaSLi Zhang 	if (!mem)
29006ebaaeaSLi Zhang 		return -rte_errno;
29106ebaaeaSLi Zhang 	if (priv->vmem_info.vmem != NULL) {
29206ebaaeaSLi Zhang 		if (mlx5_vdpa_mem_cmp(mem, priv->vmem_info.vmem) == 0) {
29306ebaaeaSLi Zhang 			/* VM memory not changed, reuse resources. */
29406ebaaeaSLi Zhang 			free(mem);
29506ebaaeaSLi Zhang 			return 0;
29606ebaaeaSLi Zhang 		}
29706ebaaeaSLi Zhang 		mlx5_vdpa_mem_dereg(priv);
29806ebaaeaSLi Zhang 	}
29906ebaaeaSLi Zhang 	priv->vmem_info.vmem = mem;
30006ebaaeaSLi Zhang 	priv->vmem_info.mode = mode;
30106ebaaeaSLi Zhang 	priv->num_mrs = mem->nregions;
30206ebaaeaSLi Zhang 	if (!priv->num_mrs || priv->num_mrs >= MLX5_VDPA_MAX_MRS) {
30306ebaaeaSLi Zhang 		DRV_LOG(ERR,
30406ebaaeaSLi Zhang 		"Invalid number of memory regions.");
30506ebaaeaSLi Zhang 		goto error;
30606ebaaeaSLi Zhang 	}
30706ebaaeaSLi Zhang 	/* The last one is indirect mkey entry. */
30806ebaaeaSLi Zhang 	priv->num_mrs++;
30906ebaaeaSLi Zhang 	mrs = rte_zmalloc("mlx5 vDPA memory regions",
31006ebaaeaSLi Zhang 		sizeof(struct mlx5_vdpa_query_mr) * priv->num_mrs, 0);
31106ebaaeaSLi Zhang 	priv->mrs = mrs;
31206ebaaeaSLi Zhang 	if (!priv->mrs) {
31306ebaaeaSLi Zhang 		DRV_LOG(ERR, "Failed to allocate private memory regions.");
31406ebaaeaSLi Zhang 		goto error;
31506ebaaeaSLi Zhang 	}
31606ebaaeaSLi Zhang 	if (priv->use_c_thread) {
31706ebaaeaSLi Zhang 		uint32_t main_task_idx[mem->nregions];
31806ebaaeaSLi Zhang 
31906ebaaeaSLi Zhang 		for (i = 0; i < mem->nregions; i++) {
32006ebaaeaSLi Zhang 			thrd_idx = i % (conf_thread_mng.max_thrds + 1);
32106ebaaeaSLi Zhang 			if (!thrd_idx) {
32206ebaaeaSLi Zhang 				main_task_idx[task_num] = i;
32306ebaaeaSLi Zhang 				task_num++;
32406ebaaeaSLi Zhang 				continue;
32506ebaaeaSLi Zhang 			}
32606ebaaeaSLi Zhang 			thrd_idx = priv->last_c_thrd_idx + 1;
32706ebaaeaSLi Zhang 			if (thrd_idx >= conf_thread_mng.max_thrds)
32806ebaaeaSLi Zhang 				thrd_idx = 0;
32906ebaaeaSLi Zhang 			priv->last_c_thrd_idx = thrd_idx;
33006ebaaeaSLi Zhang 			data[0] = i;
33106ebaaeaSLi Zhang 			if (mlx5_vdpa_task_add(priv, thrd_idx,
33206ebaaeaSLi Zhang 				MLX5_VDPA_TASK_REG_MR,
33306ebaaeaSLi Zhang 				&remaining_cnt, &err_cnt,
33406ebaaeaSLi Zhang 				(void **)&data, 1)) {
33506ebaaeaSLi Zhang 				DRV_LOG(ERR,
33606ebaaeaSLi Zhang 				"Fail to add task mem region (%d)", i);
33706ebaaeaSLi Zhang 				main_task_idx[task_num] = i;
33806ebaaeaSLi Zhang 				task_num++;
33906ebaaeaSLi Zhang 			}
34006ebaaeaSLi Zhang 		}
34106ebaaeaSLi Zhang 		for (i = 0; i < task_num; i++) {
34206ebaaeaSLi Zhang 			ret = mlx5_vdpa_register_mr(priv,
34306ebaaeaSLi Zhang 					main_task_idx[i]);
34406ebaaeaSLi Zhang 			if (ret) {
34506ebaaeaSLi Zhang 				DRV_LOG(ERR,
34606ebaaeaSLi Zhang 				"Failed to register mem region %d.", i);
34706ebaaeaSLi Zhang 				goto error;
34806ebaaeaSLi Zhang 			}
34906ebaaeaSLi Zhang 		}
35006ebaaeaSLi Zhang 		if (mlx5_vdpa_c_thread_wait_bulk_tasks_done(&remaining_cnt,
35106ebaaeaSLi Zhang 			&err_cnt, 100)) {
35206ebaaeaSLi Zhang 			DRV_LOG(ERR,
35306ebaaeaSLi Zhang 			"Failed to wait register mem region tasks ready.");
35406ebaaeaSLi Zhang 			goto error;
35506ebaaeaSLi Zhang 		}
35606ebaaeaSLi Zhang 	} else {
35706ebaaeaSLi Zhang 		for (i = 0; i < mem->nregions; i++) {
35806ebaaeaSLi Zhang 			ret = mlx5_vdpa_register_mr(priv, i);
35906ebaaeaSLi Zhang 			if (ret) {
36006ebaaeaSLi Zhang 				DRV_LOG(ERR,
36106ebaaeaSLi Zhang 				"Failed to register mem region %d.", i);
36206ebaaeaSLi Zhang 				goto error;
36306ebaaeaSLi Zhang 			}
36406ebaaeaSLi Zhang 		}
36506ebaaeaSLi Zhang 	}
36606ebaaeaSLi Zhang 	ret = mlx5_vdpa_create_indirect_mkey(priv);
36706ebaaeaSLi Zhang 	if (ret) {
36806ebaaeaSLi Zhang 		DRV_LOG(ERR, "Failed to create indirect mkey .");
36906ebaaeaSLi Zhang 		goto error;
37006ebaaeaSLi Zhang 	}
37106ebaaeaSLi Zhang 	return 0;
372cc07a42dSMatan Azrad error:
373cc07a42dSMatan Azrad 	mlx5_vdpa_mem_dereg(priv);
374cc07a42dSMatan Azrad 	rte_errno = -ret;
375cc07a42dSMatan Azrad 	return ret;
376cc07a42dSMatan Azrad }
37706ebaaeaSLi Zhang 
37806ebaaeaSLi Zhang int
mlx5_vdpa_register_mr(struct mlx5_vdpa_priv * priv,uint32_t idx)37906ebaaeaSLi Zhang mlx5_vdpa_register_mr(struct mlx5_vdpa_priv *priv, uint32_t idx)
38006ebaaeaSLi Zhang {
38106ebaaeaSLi Zhang 	struct rte_vhost_memory *mem = priv->vmem_info.vmem;
38206ebaaeaSLi Zhang 	struct mlx5_vdpa_query_mr *mrs =
38306ebaaeaSLi Zhang 		(struct mlx5_vdpa_query_mr *)priv->mrs;
38406ebaaeaSLi Zhang 	struct mlx5_vdpa_query_mr *entry;
38506ebaaeaSLi Zhang 	struct rte_vhost_mem_region *reg;
38606ebaaeaSLi Zhang 	int ret;
38706ebaaeaSLi Zhang 
38806ebaaeaSLi Zhang 	reg = &mem->regions[idx];
38906ebaaeaSLi Zhang 	entry = &mrs[idx];
39006ebaaeaSLi Zhang 	entry->mr = mlx5_glue->reg_mr_iova
39106ebaaeaSLi Zhang 				      (priv->cdev->pd,
39206ebaaeaSLi Zhang 				       (void *)(uintptr_t)(reg->host_user_addr),
39306ebaaeaSLi Zhang 				       reg->size, reg->guest_phys_addr,
39406ebaaeaSLi Zhang 				       IBV_ACCESS_LOCAL_WRITE);
39506ebaaeaSLi Zhang 	if (!entry->mr) {
39606ebaaeaSLi Zhang 		DRV_LOG(ERR, "Failed to create direct Mkey.");
39706ebaaeaSLi Zhang 		ret = -rte_errno;
39806ebaaeaSLi Zhang 		return ret;
39906ebaaeaSLi Zhang 	}
40006ebaaeaSLi Zhang 	entry->is_indirect = 0;
40106ebaaeaSLi Zhang 	return 0;
40206ebaaeaSLi Zhang }
403