1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2020-2021 Xilinx, Inc.
3 */
4
5 #include <unistd.h>
6
7 #include <rte_common.h>
8 #include <rte_errno.h>
9 #include <rte_vfio.h>
10 #include <rte_vhost.h>
11
12 #include "efx.h"
13 #include "sfc_vdpa.h"
14 #include "sfc_vdpa_ops.h"
15
16 #ifndef PAGE_SIZE
17 #define PAGE_SIZE (sysconf(_SC_PAGESIZE))
18 #endif
19
20 int
sfc_vdpa_dma_alloc(struct sfc_vdpa_adapter * sva,const char * name,size_t len,efsys_mem_t * esmp)21 sfc_vdpa_dma_alloc(struct sfc_vdpa_adapter *sva, const char *name,
22 size_t len, efsys_mem_t *esmp)
23 {
24 uint64_t mcdi_iova;
25 size_t mcdi_buff_size;
26 char mz_name[RTE_MEMZONE_NAMESIZE];
27 const struct rte_memzone *mz = NULL;
28 int numa_node = sva->pdev->device.numa_node;
29 int ret;
30
31 mcdi_buff_size = RTE_ALIGN_CEIL(len, PAGE_SIZE);
32 ret = snprintf(mz_name, RTE_MEMZONE_NAMESIZE, "%s_%s",
33 sva->pdev->name, name);
34 if (ret < 0 || ret >= RTE_MEMZONE_NAMESIZE) {
35 sfc_vdpa_err(sva, "%s_%s too long to fit in mz_name",
36 sva->pdev->name, name);
37 return -EINVAL;
38 }
39
40 sfc_vdpa_log_init(sva, "name=%s, len=%zu", mz_name, len);
41
42 mz = rte_memzone_reserve_aligned(mz_name, mcdi_buff_size,
43 numa_node,
44 RTE_MEMZONE_IOVA_CONTIG,
45 PAGE_SIZE);
46 if (mz == NULL) {
47 sfc_vdpa_err(sva, "cannot reserve memory for %s: len=%#x: %s",
48 mz_name, (unsigned int)len,
49 rte_strerror(rte_errno));
50 return -ENOMEM;
51 }
52
53 /* IOVA address for MCDI would be re-calculated if mapping
54 * using default IOVA would fail.
55 * TODO: Earlier there was no way to get valid IOVA range.
56 * Recently a patch has been submitted to get the IOVA range
57 * using ioctl. VFIO_IOMMU_GET_INFO. This patch is available
58 * in the kernel version >= 5.4. Support to get the default
59 * IOVA address for MCDI buffer using available IOVA range
60 * would be added later. Meanwhile default IOVA for MCDI buffer
61 * is kept at high mem at 2TB. In case of overlap new available
62 * addresses would be searched and same would be used.
63 */
64 mcdi_iova = SFC_VDPA_DEFAULT_MCDI_IOVA;
65
66 for (;;) {
67 ret = rte_vfio_container_dma_map(sva->vfio_container_fd,
68 (uint64_t)mz->addr, mcdi_iova,
69 mcdi_buff_size);
70 if (ret == 0)
71 break;
72
73 mcdi_iova = mcdi_iova >> 1;
74 if (mcdi_iova < mcdi_buff_size) {
75 sfc_vdpa_err(sva,
76 "DMA mapping failed for MCDI : %s",
77 rte_strerror(rte_errno));
78 rte_memzone_free(mz);
79 return ret;
80 }
81 }
82
83 esmp->esm_addr = mcdi_iova;
84 esmp->esm_base = mz->addr;
85 sva->mcdi_buff_size = mcdi_buff_size;
86
87 sfc_vdpa_info(sva,
88 "DMA name=%s len=%zu => virt=%p iova=0x%" PRIx64,
89 name, len, esmp->esm_base, esmp->esm_addr);
90
91 return 0;
92 }
93
94 void
sfc_vdpa_dma_free(struct sfc_vdpa_adapter * sva,efsys_mem_t * esmp)95 sfc_vdpa_dma_free(struct sfc_vdpa_adapter *sva, efsys_mem_t *esmp)
96 {
97 int ret;
98
99 sfc_vdpa_log_init(sva, "name=%s", esmp->esm_mz->name);
100
101 ret = rte_vfio_container_dma_unmap(sva->vfio_container_fd,
102 (uint64_t)esmp->esm_base,
103 esmp->esm_addr, sva->mcdi_buff_size);
104 if (ret < 0)
105 sfc_vdpa_err(sva, "DMA unmap failed for MCDI : %s",
106 rte_strerror(rte_errno));
107
108 sfc_vdpa_info(sva,
109 "DMA free name=%s => virt=%p iova=0x%" PRIx64,
110 esmp->esm_mz->name, esmp->esm_base, esmp->esm_addr);
111
112 rte_free((void *)(esmp->esm_base));
113
114 sva->mcdi_buff_size = 0;
115 memset(esmp, 0, sizeof(*esmp));
116 }
117
118 int
sfc_vdpa_dma_map(struct sfc_vdpa_ops_data * ops_data,bool do_map)119 sfc_vdpa_dma_map(struct sfc_vdpa_ops_data *ops_data, bool do_map)
120 {
121 uint32_t i, j;
122 int rc;
123 struct rte_vhost_memory *vhost_mem = NULL;
124 struct rte_vhost_mem_region *mem_reg = NULL;
125 int vfio_container_fd;
126 void *dev;
127
128 dev = ops_data->dev_handle;
129 vfio_container_fd =
130 sfc_vdpa_adapter_by_dev_handle(dev)->vfio_container_fd;
131
132 rc = rte_vhost_get_mem_table(ops_data->vid, &vhost_mem);
133 if (rc < 0) {
134 sfc_vdpa_err(dev,
135 "failed to get VM memory layout");
136 goto error;
137 }
138
139 for (i = 0; i < vhost_mem->nregions; i++) {
140 mem_reg = &vhost_mem->regions[i];
141
142 if (do_map) {
143 rc = rte_vfio_container_dma_map(vfio_container_fd,
144 mem_reg->host_user_addr,
145 mem_reg->guest_phys_addr,
146 mem_reg->size);
147 if (rc < 0) {
148 sfc_vdpa_err(dev,
149 "DMA map failed : %s",
150 rte_strerror(rte_errno));
151 goto failed_vfio_dma_map;
152 }
153 } else {
154 rc = rte_vfio_container_dma_unmap(vfio_container_fd,
155 mem_reg->host_user_addr,
156 mem_reg->guest_phys_addr,
157 mem_reg->size);
158 if (rc < 0) {
159 sfc_vdpa_err(dev,
160 "DMA unmap failed : %s",
161 rte_strerror(rte_errno));
162 goto error;
163 }
164 }
165 }
166
167 free(vhost_mem);
168
169 return 0;
170
171 failed_vfio_dma_map:
172 for (j = 0; j < i; j++) {
173 mem_reg = &vhost_mem->regions[j];
174 rte_vfio_container_dma_unmap(vfio_container_fd,
175 mem_reg->host_user_addr,
176 mem_reg->guest_phys_addr,
177 mem_reg->size);
178 }
179
180 error:
181 free(vhost_mem);
182
183 return rc;
184 }
185
186 static int
sfc_vdpa_mem_bar_init(struct sfc_vdpa_adapter * sva,const efx_bar_region_t * mem_ebrp)187 sfc_vdpa_mem_bar_init(struct sfc_vdpa_adapter *sva,
188 const efx_bar_region_t *mem_ebrp)
189 {
190 struct rte_pci_device *pci_dev = sva->pdev;
191 efsys_bar_t *ebp = &sva->mem_bar;
192 struct rte_mem_resource *res =
193 &pci_dev->mem_resource[mem_ebrp->ebr_index];
194
195 SFC_BAR_LOCK_INIT(ebp, pci_dev->name);
196 ebp->esb_rid = mem_ebrp->ebr_index;
197 ebp->esb_dev = pci_dev;
198 ebp->esb_base = res->addr;
199
200 return 0;
201 }
202
203 static void
sfc_vdpa_mem_bar_fini(struct sfc_vdpa_adapter * sva)204 sfc_vdpa_mem_bar_fini(struct sfc_vdpa_adapter *sva)
205 {
206 efsys_bar_t *ebp = &sva->mem_bar;
207
208 SFC_BAR_LOCK_DESTROY(ebp);
209 memset(ebp, 0, sizeof(*ebp));
210 }
211
212 static int
sfc_vdpa_nic_probe(struct sfc_vdpa_adapter * sva)213 sfc_vdpa_nic_probe(struct sfc_vdpa_adapter *sva)
214 {
215 efx_nic_t *enp = sva->nic;
216 int rc;
217
218 rc = efx_nic_probe(enp, EFX_FW_VARIANT_DONT_CARE);
219 if (rc != 0)
220 sfc_vdpa_err(sva, "nic probe failed: %s", rte_strerror(rc));
221
222 return rc;
223 }
224
225 static int
sfc_vdpa_estimate_resource_limits(struct sfc_vdpa_adapter * sva)226 sfc_vdpa_estimate_resource_limits(struct sfc_vdpa_adapter *sva)
227 {
228 efx_drv_limits_t limits;
229 int rc;
230 uint32_t evq_allocated;
231 uint32_t rxq_allocated;
232 uint32_t txq_allocated;
233 uint32_t max_queue_cnt;
234
235 memset(&limits, 0, sizeof(limits));
236
237 /* Request at least one Rx and Tx queue */
238 limits.edl_min_rxq_count = 1;
239 limits.edl_min_txq_count = 1;
240 /* Management event queue plus event queue for Tx/Rx queue */
241 limits.edl_min_evq_count =
242 1 + RTE_MAX(limits.edl_min_rxq_count, limits.edl_min_txq_count);
243
244 limits.edl_max_rxq_count = SFC_VDPA_MAX_QUEUE_PAIRS;
245 limits.edl_max_txq_count = SFC_VDPA_MAX_QUEUE_PAIRS;
246 limits.edl_max_evq_count = 1 + SFC_VDPA_MAX_QUEUE_PAIRS;
247
248 SFC_VDPA_ASSERT(limits.edl_max_evq_count >= limits.edl_min_rxq_count);
249 SFC_VDPA_ASSERT(limits.edl_max_rxq_count >= limits.edl_min_rxq_count);
250 SFC_VDPA_ASSERT(limits.edl_max_txq_count >= limits.edl_min_rxq_count);
251
252 /* Configure the minimum required resources needed for the
253 * driver to operate, and the maximum desired resources that the
254 * driver is capable of using.
255 */
256 sfc_vdpa_log_init(sva, "set drv limit");
257 efx_nic_set_drv_limits(sva->nic, &limits);
258
259 sfc_vdpa_log_init(sva, "init nic");
260 rc = efx_nic_init(sva->nic);
261 if (rc != 0) {
262 sfc_vdpa_err(sva, "nic init failed: %s", rte_strerror(rc));
263 goto fail_nic_init;
264 }
265
266 /* Find resource dimensions assigned by firmware to this function */
267 rc = efx_nic_get_vi_pool(sva->nic, &evq_allocated, &rxq_allocated,
268 &txq_allocated);
269 if (rc != 0) {
270 sfc_vdpa_err(sva, "vi pool get failed: %s", rte_strerror(rc));
271 goto fail_get_vi_pool;
272 }
273
274 /* It still may allocate more than maximum, ensure limit */
275 evq_allocated = RTE_MIN(evq_allocated, limits.edl_max_evq_count);
276 rxq_allocated = RTE_MIN(rxq_allocated, limits.edl_max_rxq_count);
277 txq_allocated = RTE_MIN(txq_allocated, limits.edl_max_txq_count);
278
279
280 max_queue_cnt = RTE_MIN(rxq_allocated, txq_allocated);
281 /* Subtract management EVQ not used for traffic */
282 max_queue_cnt = RTE_MIN(evq_allocated - 1, max_queue_cnt);
283
284 SFC_VDPA_ASSERT(max_queue_cnt > 0);
285
286 sva->max_queue_count = max_queue_cnt;
287 sfc_vdpa_log_init(sva, "NIC init done with %u pair(s) of queues",
288 max_queue_cnt);
289
290 return 0;
291
292 fail_get_vi_pool:
293 efx_nic_fini(sva->nic);
294 fail_nic_init:
295 sfc_vdpa_log_init(sva, "failed: %s", rte_strerror(rc));
296 return rc;
297 }
298
299 int
sfc_vdpa_hw_init(struct sfc_vdpa_adapter * sva)300 sfc_vdpa_hw_init(struct sfc_vdpa_adapter *sva)
301 {
302 efx_bar_region_t mem_ebr;
303 efx_nic_t *enp;
304 int rc;
305
306 sfc_vdpa_log_init(sva, "entry");
307
308 sfc_vdpa_log_init(sva, "get family");
309 rc = sfc_efx_family(sva->pdev, &mem_ebr, &sva->family);
310 if (rc != 0)
311 goto fail_family;
312 sfc_vdpa_log_init(sva,
313 "family is %u, membar is %d,"
314 "function control window offset is %#" PRIx64,
315 sva->family, mem_ebr.ebr_index, mem_ebr.ebr_offset);
316
317 sfc_vdpa_log_init(sva, "init mem bar");
318 rc = sfc_vdpa_mem_bar_init(sva, &mem_ebr);
319 if (rc != 0)
320 goto fail_mem_bar_init;
321
322 sfc_vdpa_log_init(sva, "create nic");
323 rte_spinlock_init(&sva->nic_lock);
324 rc = efx_nic_create(sva->family, (efsys_identifier_t *)sva,
325 &sva->mem_bar, mem_ebr.ebr_offset,
326 &sva->nic_lock, &enp);
327 if (rc != 0) {
328 sfc_vdpa_err(sva, "nic create failed: %s", rte_strerror(rc));
329 goto fail_nic_create;
330 }
331 sva->nic = enp;
332
333 sfc_vdpa_log_init(sva, "init mcdi");
334 rc = sfc_vdpa_mcdi_init(sva);
335 if (rc != 0) {
336 sfc_vdpa_err(sva, "mcdi init failed: %s", rte_strerror(rc));
337 goto fail_mcdi_init;
338 }
339
340 sfc_vdpa_log_init(sva, "probe nic");
341 rc = sfc_vdpa_nic_probe(sva);
342 if (rc != 0)
343 goto fail_nic_probe;
344
345 sfc_vdpa_log_init(sva, "reset nic");
346 rc = efx_nic_reset(enp);
347 if (rc != 0) {
348 sfc_vdpa_err(sva, "nic reset failed: %s", rte_strerror(rc));
349 goto fail_nic_reset;
350 }
351
352 sfc_vdpa_log_init(sva, "estimate resource limits");
353 rc = sfc_vdpa_estimate_resource_limits(sva);
354 if (rc != 0)
355 goto fail_estimate_rsrc_limits;
356
357 sfc_vdpa_log_init(sva, "init virtio");
358 rc = efx_virtio_init(enp);
359 if (rc != 0) {
360 sfc_vdpa_err(sva, "virtio init failed: %s", rte_strerror(rc));
361 goto fail_virtio_init;
362 }
363
364 sfc_vdpa_log_init(sva, "init filter");
365 rc = efx_filter_init(enp);
366 if (rc != 0) {
367 sfc_vdpa_err(sva, "filter init failed: %s", rte_strerror(rc));
368 goto fail_filter_init;
369 }
370
371 sfc_vdpa_log_init(sva, "done");
372
373 return 0;
374
375 fail_filter_init:
376 efx_virtio_fini(enp);
377
378 fail_virtio_init:
379 efx_nic_fini(enp);
380
381 fail_estimate_rsrc_limits:
382 fail_nic_reset:
383 efx_nic_unprobe(enp);
384
385 fail_nic_probe:
386 sfc_vdpa_mcdi_fini(sva);
387
388 fail_mcdi_init:
389 sfc_vdpa_log_init(sva, "destroy nic");
390 sva->nic = NULL;
391 efx_nic_destroy(enp);
392
393 fail_nic_create:
394 sfc_vdpa_mem_bar_fini(sva);
395
396 fail_mem_bar_init:
397 fail_family:
398 sfc_vdpa_log_init(sva, "failed: %s", rte_strerror(rc));
399 return rc;
400 }
401
402 void
sfc_vdpa_hw_fini(struct sfc_vdpa_adapter * sva)403 sfc_vdpa_hw_fini(struct sfc_vdpa_adapter *sva)
404 {
405 efx_nic_t *enp = sva->nic;
406
407 sfc_vdpa_log_init(sva, "entry");
408
409 sfc_vdpa_log_init(sva, "virtio fini");
410 efx_virtio_fini(enp);
411
412 sfc_vdpa_log_init(sva, "unprobe nic");
413 efx_nic_unprobe(enp);
414
415 sfc_vdpa_log_init(sva, "mcdi fini");
416 sfc_vdpa_mcdi_fini(sva);
417
418 sfc_vdpa_log_init(sva, "nic fini");
419 efx_nic_fini(enp);
420
421 sfc_vdpa_log_init(sva, "destroy nic");
422 sva->nic = NULL;
423 efx_nic_destroy(enp);
424
425 sfc_vdpa_mem_bar_fini(sva);
426 }
427