xref: /dpdk/drivers/vdpa/sfc/sfc_vdpa_hw.c (revision 4ea22410ea9c0235db40313bb810582b25e845b3)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2020-2021 Xilinx, Inc.
3  */
4 
5 #include <unistd.h>
6 
7 #include <rte_common.h>
8 #include <rte_errno.h>
9 #include <rte_vfio.h>
10 #include <rte_vhost.h>
11 
12 #include "efx.h"
13 #include "sfc_vdpa.h"
14 #include "sfc_vdpa_ops.h"
15 
16 #ifndef PAGE_SIZE
17 #define PAGE_SIZE   (sysconf(_SC_PAGESIZE))
18 #endif
19 
20 int
sfc_vdpa_dma_alloc(struct sfc_vdpa_adapter * sva,const char * name,size_t len,efsys_mem_t * esmp)21 sfc_vdpa_dma_alloc(struct sfc_vdpa_adapter *sva, const char *name,
22 		   size_t len, efsys_mem_t *esmp)
23 {
24 	uint64_t mcdi_iova;
25 	size_t mcdi_buff_size;
26 	char mz_name[RTE_MEMZONE_NAMESIZE];
27 	const struct rte_memzone *mz = NULL;
28 	int numa_node = sva->pdev->device.numa_node;
29 	int ret;
30 
31 	mcdi_buff_size = RTE_ALIGN_CEIL(len, PAGE_SIZE);
32 	ret = snprintf(mz_name, RTE_MEMZONE_NAMESIZE, "%s_%s",
33 		       sva->pdev->name, name);
34 	if (ret < 0 || ret >= RTE_MEMZONE_NAMESIZE) {
35 		sfc_vdpa_err(sva, "%s_%s too long to fit in mz_name",
36 			     sva->pdev->name, name);
37 		return -EINVAL;
38 	}
39 
40 	sfc_vdpa_log_init(sva, "name=%s, len=%zu", mz_name, len);
41 
42 	mz = rte_memzone_reserve_aligned(mz_name, mcdi_buff_size,
43 					 numa_node,
44 					 RTE_MEMZONE_IOVA_CONTIG,
45 					 PAGE_SIZE);
46 	if (mz == NULL) {
47 		sfc_vdpa_err(sva, "cannot reserve memory for %s: len=%#x: %s",
48 			     mz_name, (unsigned int)len,
49 			     rte_strerror(rte_errno));
50 		return -ENOMEM;
51 	}
52 
53 	/* IOVA address for MCDI would be re-calculated if mapping
54 	 * using default IOVA would fail.
55 	 * TODO: Earlier there was no way to get valid IOVA range.
56 	 * Recently a patch has been submitted to get the IOVA range
57 	 * using ioctl. VFIO_IOMMU_GET_INFO. This patch is available
58 	 * in the kernel version >= 5.4. Support to get the default
59 	 * IOVA address for MCDI buffer using available IOVA range
60 	 * would be added later. Meanwhile default IOVA for MCDI buffer
61 	 * is kept at high mem at 2TB. In case of overlap new available
62 	 * addresses would be searched and same would be used.
63 	 */
64 	mcdi_iova = SFC_VDPA_DEFAULT_MCDI_IOVA;
65 
66 	for (;;) {
67 		ret = rte_vfio_container_dma_map(sva->vfio_container_fd,
68 						 (uint64_t)mz->addr, mcdi_iova,
69 						 mcdi_buff_size);
70 		if (ret == 0)
71 			break;
72 
73 		mcdi_iova = mcdi_iova >> 1;
74 		if (mcdi_iova < mcdi_buff_size)	{
75 			sfc_vdpa_err(sva,
76 				     "DMA mapping failed for MCDI : %s",
77 				     rte_strerror(rte_errno));
78 			rte_memzone_free(mz);
79 			return ret;
80 		}
81 	}
82 
83 	esmp->esm_addr = mcdi_iova;
84 	esmp->esm_base = mz->addr;
85 	sva->mcdi_buff_size = mcdi_buff_size;
86 
87 	sfc_vdpa_info(sva,
88 		      "DMA name=%s len=%zu => virt=%p iova=0x%" PRIx64,
89 		      name, len, esmp->esm_base, esmp->esm_addr);
90 
91 	return 0;
92 }
93 
94 void
sfc_vdpa_dma_free(struct sfc_vdpa_adapter * sva,efsys_mem_t * esmp)95 sfc_vdpa_dma_free(struct sfc_vdpa_adapter *sva, efsys_mem_t *esmp)
96 {
97 	int ret;
98 
99 	sfc_vdpa_log_init(sva, "name=%s", esmp->esm_mz->name);
100 
101 	ret = rte_vfio_container_dma_unmap(sva->vfio_container_fd,
102 					   (uint64_t)esmp->esm_base,
103 					   esmp->esm_addr, sva->mcdi_buff_size);
104 	if (ret < 0)
105 		sfc_vdpa_err(sva, "DMA unmap failed for MCDI : %s",
106 			     rte_strerror(rte_errno));
107 
108 	sfc_vdpa_info(sva,
109 		      "DMA free name=%s => virt=%p iova=0x%" PRIx64,
110 		      esmp->esm_mz->name, esmp->esm_base, esmp->esm_addr);
111 
112 	rte_free((void *)(esmp->esm_base));
113 
114 	sva->mcdi_buff_size = 0;
115 	memset(esmp, 0, sizeof(*esmp));
116 }
117 
118 int
sfc_vdpa_dma_map(struct sfc_vdpa_ops_data * ops_data,bool do_map)119 sfc_vdpa_dma_map(struct sfc_vdpa_ops_data *ops_data, bool do_map)
120 {
121 	uint32_t i, j;
122 	int rc;
123 	struct rte_vhost_memory *vhost_mem = NULL;
124 	struct rte_vhost_mem_region *mem_reg = NULL;
125 	int vfio_container_fd;
126 	void *dev;
127 
128 	dev = ops_data->dev_handle;
129 	vfio_container_fd =
130 		sfc_vdpa_adapter_by_dev_handle(dev)->vfio_container_fd;
131 
132 	rc = rte_vhost_get_mem_table(ops_data->vid, &vhost_mem);
133 	if (rc < 0) {
134 		sfc_vdpa_err(dev,
135 			     "failed to get VM memory layout");
136 		goto error;
137 	}
138 
139 	for (i = 0; i < vhost_mem->nregions; i++) {
140 		mem_reg = &vhost_mem->regions[i];
141 
142 		if (do_map) {
143 			rc = rte_vfio_container_dma_map(vfio_container_fd,
144 						mem_reg->host_user_addr,
145 						mem_reg->guest_phys_addr,
146 						mem_reg->size);
147 			if (rc < 0) {
148 				sfc_vdpa_err(dev,
149 					     "DMA map failed : %s",
150 					     rte_strerror(rte_errno));
151 				goto failed_vfio_dma_map;
152 			}
153 		} else {
154 			rc = rte_vfio_container_dma_unmap(vfio_container_fd,
155 						mem_reg->host_user_addr,
156 						mem_reg->guest_phys_addr,
157 						mem_reg->size);
158 			if (rc < 0) {
159 				sfc_vdpa_err(dev,
160 					     "DMA unmap failed : %s",
161 					     rte_strerror(rte_errno));
162 				goto error;
163 			}
164 		}
165 	}
166 
167 	free(vhost_mem);
168 
169 	return 0;
170 
171 failed_vfio_dma_map:
172 	for (j = 0; j < i; j++) {
173 		mem_reg = &vhost_mem->regions[j];
174 		rte_vfio_container_dma_unmap(vfio_container_fd,
175 					     mem_reg->host_user_addr,
176 					     mem_reg->guest_phys_addr,
177 					     mem_reg->size);
178 	}
179 
180 error:
181 	free(vhost_mem);
182 
183 	return rc;
184 }
185 
186 static int
sfc_vdpa_mem_bar_init(struct sfc_vdpa_adapter * sva,const efx_bar_region_t * mem_ebrp)187 sfc_vdpa_mem_bar_init(struct sfc_vdpa_adapter *sva,
188 		      const efx_bar_region_t *mem_ebrp)
189 {
190 	struct rte_pci_device *pci_dev = sva->pdev;
191 	efsys_bar_t *ebp = &sva->mem_bar;
192 	struct rte_mem_resource *res =
193 		&pci_dev->mem_resource[mem_ebrp->ebr_index];
194 
195 	SFC_BAR_LOCK_INIT(ebp, pci_dev->name);
196 	ebp->esb_rid = mem_ebrp->ebr_index;
197 	ebp->esb_dev = pci_dev;
198 	ebp->esb_base = res->addr;
199 
200 	return 0;
201 }
202 
203 static void
sfc_vdpa_mem_bar_fini(struct sfc_vdpa_adapter * sva)204 sfc_vdpa_mem_bar_fini(struct sfc_vdpa_adapter *sva)
205 {
206 	efsys_bar_t *ebp = &sva->mem_bar;
207 
208 	SFC_BAR_LOCK_DESTROY(ebp);
209 	memset(ebp, 0, sizeof(*ebp));
210 }
211 
212 static int
sfc_vdpa_nic_probe(struct sfc_vdpa_adapter * sva)213 sfc_vdpa_nic_probe(struct sfc_vdpa_adapter *sva)
214 {
215 	efx_nic_t *enp = sva->nic;
216 	int rc;
217 
218 	rc = efx_nic_probe(enp, EFX_FW_VARIANT_DONT_CARE);
219 	if (rc != 0)
220 		sfc_vdpa_err(sva, "nic probe failed: %s", rte_strerror(rc));
221 
222 	return rc;
223 }
224 
225 static int
sfc_vdpa_estimate_resource_limits(struct sfc_vdpa_adapter * sva)226 sfc_vdpa_estimate_resource_limits(struct sfc_vdpa_adapter *sva)
227 {
228 	efx_drv_limits_t limits;
229 	int rc;
230 	uint32_t evq_allocated;
231 	uint32_t rxq_allocated;
232 	uint32_t txq_allocated;
233 	uint32_t max_queue_cnt;
234 
235 	memset(&limits, 0, sizeof(limits));
236 
237 	/* Request at least one Rx and Tx queue */
238 	limits.edl_min_rxq_count = 1;
239 	limits.edl_min_txq_count = 1;
240 	/* Management event queue plus event queue for Tx/Rx queue */
241 	limits.edl_min_evq_count =
242 		1 + RTE_MAX(limits.edl_min_rxq_count, limits.edl_min_txq_count);
243 
244 	limits.edl_max_rxq_count = SFC_VDPA_MAX_QUEUE_PAIRS;
245 	limits.edl_max_txq_count = SFC_VDPA_MAX_QUEUE_PAIRS;
246 	limits.edl_max_evq_count = 1 + SFC_VDPA_MAX_QUEUE_PAIRS;
247 
248 	SFC_VDPA_ASSERT(limits.edl_max_evq_count >= limits.edl_min_rxq_count);
249 	SFC_VDPA_ASSERT(limits.edl_max_rxq_count >= limits.edl_min_rxq_count);
250 	SFC_VDPA_ASSERT(limits.edl_max_txq_count >= limits.edl_min_rxq_count);
251 
252 	/* Configure the minimum required resources needed for the
253 	 * driver to operate, and the maximum desired resources that the
254 	 * driver is capable of using.
255 	 */
256 	sfc_vdpa_log_init(sva, "set drv limit");
257 	efx_nic_set_drv_limits(sva->nic, &limits);
258 
259 	sfc_vdpa_log_init(sva, "init nic");
260 	rc = efx_nic_init(sva->nic);
261 	if (rc != 0) {
262 		sfc_vdpa_err(sva, "nic init failed: %s", rte_strerror(rc));
263 		goto fail_nic_init;
264 	}
265 
266 	/* Find resource dimensions assigned by firmware to this function */
267 	rc = efx_nic_get_vi_pool(sva->nic, &evq_allocated, &rxq_allocated,
268 				 &txq_allocated);
269 	if (rc != 0) {
270 		sfc_vdpa_err(sva, "vi pool get failed: %s", rte_strerror(rc));
271 		goto fail_get_vi_pool;
272 	}
273 
274 	/* It still may allocate more than maximum, ensure limit */
275 	evq_allocated = RTE_MIN(evq_allocated, limits.edl_max_evq_count);
276 	rxq_allocated = RTE_MIN(rxq_allocated, limits.edl_max_rxq_count);
277 	txq_allocated = RTE_MIN(txq_allocated, limits.edl_max_txq_count);
278 
279 
280 	max_queue_cnt = RTE_MIN(rxq_allocated, txq_allocated);
281 	/* Subtract management EVQ not used for traffic */
282 	max_queue_cnt = RTE_MIN(evq_allocated - 1, max_queue_cnt);
283 
284 	SFC_VDPA_ASSERT(max_queue_cnt > 0);
285 
286 	sva->max_queue_count = max_queue_cnt;
287 	sfc_vdpa_log_init(sva, "NIC init done with %u pair(s) of queues",
288 			  max_queue_cnt);
289 
290 	return 0;
291 
292 fail_get_vi_pool:
293 	efx_nic_fini(sva->nic);
294 fail_nic_init:
295 	sfc_vdpa_log_init(sva, "failed: %s", rte_strerror(rc));
296 	return rc;
297 }
298 
299 int
sfc_vdpa_hw_init(struct sfc_vdpa_adapter * sva)300 sfc_vdpa_hw_init(struct sfc_vdpa_adapter *sva)
301 {
302 	efx_bar_region_t mem_ebr;
303 	efx_nic_t *enp;
304 	int rc;
305 
306 	sfc_vdpa_log_init(sva, "entry");
307 
308 	sfc_vdpa_log_init(sva, "get family");
309 	rc = sfc_efx_family(sva->pdev, &mem_ebr, &sva->family);
310 	if (rc != 0)
311 		goto fail_family;
312 	sfc_vdpa_log_init(sva,
313 			  "family is %u, membar is %d,"
314 			  "function control window offset is %#" PRIx64,
315 			  sva->family, mem_ebr.ebr_index, mem_ebr.ebr_offset);
316 
317 	sfc_vdpa_log_init(sva, "init mem bar");
318 	rc = sfc_vdpa_mem_bar_init(sva, &mem_ebr);
319 	if (rc != 0)
320 		goto fail_mem_bar_init;
321 
322 	sfc_vdpa_log_init(sva, "create nic");
323 	rte_spinlock_init(&sva->nic_lock);
324 	rc = efx_nic_create(sva->family, (efsys_identifier_t *)sva,
325 			    &sva->mem_bar, mem_ebr.ebr_offset,
326 			    &sva->nic_lock, &enp);
327 	if (rc != 0) {
328 		sfc_vdpa_err(sva, "nic create failed: %s", rte_strerror(rc));
329 		goto fail_nic_create;
330 	}
331 	sva->nic = enp;
332 
333 	sfc_vdpa_log_init(sva, "init mcdi");
334 	rc = sfc_vdpa_mcdi_init(sva);
335 	if (rc != 0) {
336 		sfc_vdpa_err(sva, "mcdi init failed: %s", rte_strerror(rc));
337 		goto fail_mcdi_init;
338 	}
339 
340 	sfc_vdpa_log_init(sva, "probe nic");
341 	rc = sfc_vdpa_nic_probe(sva);
342 	if (rc != 0)
343 		goto fail_nic_probe;
344 
345 	sfc_vdpa_log_init(sva, "reset nic");
346 	rc = efx_nic_reset(enp);
347 	if (rc != 0) {
348 		sfc_vdpa_err(sva, "nic reset failed: %s", rte_strerror(rc));
349 		goto fail_nic_reset;
350 	}
351 
352 	sfc_vdpa_log_init(sva, "estimate resource limits");
353 	rc = sfc_vdpa_estimate_resource_limits(sva);
354 	if (rc != 0)
355 		goto fail_estimate_rsrc_limits;
356 
357 	sfc_vdpa_log_init(sva, "init virtio");
358 	rc = efx_virtio_init(enp);
359 	if (rc != 0) {
360 		sfc_vdpa_err(sva, "virtio init failed: %s", rte_strerror(rc));
361 		goto fail_virtio_init;
362 	}
363 
364 	sfc_vdpa_log_init(sva, "init filter");
365 	rc = efx_filter_init(enp);
366 	if (rc != 0) {
367 		sfc_vdpa_err(sva, "filter init failed: %s", rte_strerror(rc));
368 		goto fail_filter_init;
369 	}
370 
371 	sfc_vdpa_log_init(sva, "done");
372 
373 	return 0;
374 
375 fail_filter_init:
376 	efx_virtio_fini(enp);
377 
378 fail_virtio_init:
379 	efx_nic_fini(enp);
380 
381 fail_estimate_rsrc_limits:
382 fail_nic_reset:
383 	efx_nic_unprobe(enp);
384 
385 fail_nic_probe:
386 	sfc_vdpa_mcdi_fini(sva);
387 
388 fail_mcdi_init:
389 	sfc_vdpa_log_init(sva, "destroy nic");
390 	sva->nic = NULL;
391 	efx_nic_destroy(enp);
392 
393 fail_nic_create:
394 	sfc_vdpa_mem_bar_fini(sva);
395 
396 fail_mem_bar_init:
397 fail_family:
398 	sfc_vdpa_log_init(sva, "failed: %s", rte_strerror(rc));
399 	return rc;
400 }
401 
402 void
sfc_vdpa_hw_fini(struct sfc_vdpa_adapter * sva)403 sfc_vdpa_hw_fini(struct sfc_vdpa_adapter *sva)
404 {
405 	efx_nic_t *enp = sva->nic;
406 
407 	sfc_vdpa_log_init(sva, "entry");
408 
409 	sfc_vdpa_log_init(sva, "virtio fini");
410 	efx_virtio_fini(enp);
411 
412 	sfc_vdpa_log_init(sva, "unprobe nic");
413 	efx_nic_unprobe(enp);
414 
415 	sfc_vdpa_log_init(sva, "mcdi fini");
416 	sfc_vdpa_mcdi_fini(sva);
417 
418 	sfc_vdpa_log_init(sva, "nic fini");
419 	efx_nic_fini(enp);
420 
421 	sfc_vdpa_log_init(sva, "destroy nic");
422 	sva->nic = NULL;
423 	efx_nic_destroy(enp);
424 
425 	sfc_vdpa_mem_bar_fini(sva);
426 }
427