xref: /spdk/module/accel/dsa/accel_dsa.c (revision 30afc27748e69257ca50f7e3a4b4ca6466ffc26b)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (C) 2022 Intel Corporation.
3  *   Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES.
4  *   All rights reserved.
5  */
6 
7 #include "accel_dsa.h"
8 
9 #include "spdk/stdinc.h"
10 
11 #include "spdk/accel_module.h"
12 #include "spdk/log.h"
13 #include "spdk_internal/idxd.h"
14 
15 #include "spdk/env.h"
16 #include "spdk/event.h"
17 #include "spdk/likely.h"
18 #include "spdk/thread.h"
19 #include "spdk/idxd.h"
20 #include "spdk/util.h"
21 #include "spdk/json.h"
22 #include "spdk/trace.h"
23 #include "spdk_internal/trace_defs.h"
24 
25 static bool g_dsa_enable = false;
26 static bool g_kernel_mode = false;
27 
28 enum channel_state {
29 	IDXD_CHANNEL_ACTIVE,
30 	IDXD_CHANNEL_ERROR,
31 };
32 
33 static bool g_dsa_initialized = false;
34 
35 struct idxd_device {
36 	struct				spdk_idxd_device *dsa;
37 	TAILQ_ENTRY(idxd_device)	tailq;
38 };
39 static TAILQ_HEAD(, idxd_device) g_dsa_devices = TAILQ_HEAD_INITIALIZER(g_dsa_devices);
40 static struct idxd_device *g_next_dev = NULL;
41 static uint32_t g_num_devices = 0;
42 static pthread_mutex_t g_dev_lock = PTHREAD_MUTEX_INITIALIZER;
43 
44 struct idxd_task {
45 	struct spdk_accel_task	task;
46 	struct idxd_io_channel	*chan;
47 };
48 
49 struct idxd_io_channel {
50 	struct spdk_idxd_io_channel	*chan;
51 	struct idxd_device		*dev;
52 	enum channel_state		state;
53 	struct spdk_poller		*poller;
54 	uint32_t			num_outstanding;
55 	STAILQ_HEAD(, spdk_accel_task)	queued_tasks;
56 };
57 
58 static struct spdk_io_channel *dsa_get_io_channel(void);
59 
60 static struct idxd_device *
61 idxd_select_device(struct idxd_io_channel *chan)
62 {
63 	uint32_t count = 0;
64 	struct idxd_device *dev;
65 	uint32_t numa_id = spdk_env_get_numa_id(spdk_env_get_current_core());
66 
67 	/*
68 	 * We allow channels to share underlying devices,
69 	 * selection is round-robin based with a limitation
70 	 * on how many channel can share one device.
71 	 */
72 	do {
73 		/* select next device */
74 		pthread_mutex_lock(&g_dev_lock);
75 		g_next_dev = TAILQ_NEXT(g_next_dev, tailq);
76 		if (g_next_dev == NULL) {
77 			g_next_dev = TAILQ_FIRST(&g_dsa_devices);
78 		}
79 		dev = g_next_dev;
80 		pthread_mutex_unlock(&g_dev_lock);
81 
82 		if (numa_id != spdk_idxd_get_socket(dev->dsa)) {
83 			continue;
84 		}
85 
86 		/*
87 		 * Now see if a channel is available on this one. We only
88 		 * allow a specific number of channels to share a device
89 		 * to limit outstanding IO for flow control purposes.
90 		 */
91 		chan->chan = spdk_idxd_get_channel(dev->dsa);
92 		if (chan->chan != NULL) {
93 			SPDK_DEBUGLOG(accel_dsa, "On socket %d using device on numa %d\n",
94 				      numa_id, spdk_idxd_get_socket(dev->dsa));
95 			return dev;
96 		}
97 	} while (++count < g_num_devices);
98 
99 	/* We are out of available channels and/or devices for the local socket. We fix the number
100 	 * of channels that we allocate per device and only allocate devices on the same socket
101 	 * that the current thread is on. If on a 2 socket system it may be possible to avoid
102 	 * this situation by spreading threads across the sockets.
103 	 */
104 	SPDK_ERRLOG("No more DSA devices available on the local socket.\n");
105 	return NULL;
106 }
107 
108 static void
109 dsa_done(void *cb_arg, int status)
110 {
111 	struct idxd_task *idxd_task = cb_arg;
112 	struct idxd_io_channel *chan;
113 	int rc;
114 
115 	chan = idxd_task->chan;
116 
117 	/* If the DSA DIF Check operation detects an error, detailed info about
118 	 * this error (like actual/expected values) needs to be obtained by
119 	 * calling the software DIF Verify operation.
120 	 */
121 	if (spdk_unlikely(status == -EIO)) {
122 		if (idxd_task->task.op_code == SPDK_ACCEL_OPC_DIF_VERIFY ||
123 		    idxd_task->task.op_code == SPDK_ACCEL_OPC_DIF_VERIFY_COPY) {
124 			rc = spdk_dif_verify(idxd_task->task.s.iovs, idxd_task->task.s.iovcnt,
125 					     idxd_task->task.dif.num_blocks,
126 					     idxd_task->task.dif.ctx, idxd_task->task.dif.err);
127 			if (rc != 0) {
128 				SPDK_ERRLOG("DIF error detected. type=%d, offset=%" PRIu32 "\n",
129 					    idxd_task->task.dif.err->err_type,
130 					    idxd_task->task.dif.err->err_offset);
131 			}
132 		}
133 	}
134 
135 	assert(chan->num_outstanding > 0);
136 	spdk_trace_record(TRACE_ACCEL_DSA_OP_COMPLETE, 0, 0, 0, chan->num_outstanding - 1);
137 	chan->num_outstanding--;
138 
139 	spdk_accel_task_complete(&idxd_task->task, status);
140 }
141 
142 static int
143 idxd_submit_dualcast(struct idxd_io_channel *ch, struct idxd_task *idxd_task, int flags)
144 {
145 	struct spdk_accel_task *task = &idxd_task->task;
146 
147 	if (spdk_unlikely(task->d.iovcnt != 1 || task->d2.iovcnt != 1 || task->s.iovcnt != 1)) {
148 		return -EINVAL;
149 	}
150 
151 	if (spdk_unlikely(task->d.iovs[0].iov_len != task->s.iovs[0].iov_len ||
152 			  task->d.iovs[0].iov_len != task->d2.iovs[0].iov_len)) {
153 		return -EINVAL;
154 	}
155 
156 	return spdk_idxd_submit_dualcast(ch->chan, task->d.iovs[0].iov_base,
157 					 task->d2.iovs[0].iov_base, task->s.iovs[0].iov_base,
158 					 task->d.iovs[0].iov_len, flags, dsa_done, idxd_task);
159 }
160 
161 static int
162 check_dsa_dif_strip_overlap_bufs(struct spdk_accel_task *task)
163 {
164 	uint64_t src_seg_addr_end_ext;
165 	uint64_t dst_seg_addr_end_ext;
166 	size_t i;
167 
168 	/* The number of source and destination iovecs must be the same.
169 	 * If so, one of them can be used to iterate over both vectors
170 	 * later in the loop. */
171 	if (task->d.iovcnt != task->s.iovcnt) {
172 		SPDK_ERRLOG("Mismatched iovcnts: src=%d, dst=%d\n",
173 			    task->s.iovcnt, task->d.iovcnt);
174 		return -EINVAL;
175 	}
176 
177 	for (i = 0; i < task->s.iovcnt; i++) {
178 		src_seg_addr_end_ext = (uint64_t)task->s.iovs[i].iov_base +
179 				       task->s.iovs[i].iov_len;
180 
181 		dst_seg_addr_end_ext = (uint64_t)task->d.iovs[i].iov_base +
182 				       task->s.iovs[i].iov_len;
183 
184 		if ((dst_seg_addr_end_ext >= (uint64_t)task->s.iovs[i].iov_base) &&
185 		    (dst_seg_addr_end_ext <= src_seg_addr_end_ext)) {
186 			return -EFAULT;
187 		}
188 	}
189 
190 	return 0;
191 }
192 
193 static void
194 spdk_accel_sw_task_complete(void *ctx)
195 {
196 	struct spdk_accel_task *task = (struct spdk_accel_task *)ctx;
197 
198 	spdk_accel_task_complete(task, task->status);
199 }
200 
201 static int
202 _process_single_task(struct spdk_io_channel *ch, struct spdk_accel_task *task)
203 {
204 	struct idxd_io_channel *chan = spdk_io_channel_get_ctx(ch);
205 	struct idxd_task *idxd_task;
206 	int rc = 0, flags = 0;
207 
208 	idxd_task = SPDK_CONTAINEROF(task, struct idxd_task, task);
209 	idxd_task->chan = chan;
210 
211 	switch (task->op_code) {
212 	case SPDK_ACCEL_OPC_COPY:
213 		rc = spdk_idxd_submit_copy(chan->chan, task->d.iovs, task->d.iovcnt,
214 					   task->s.iovs, task->s.iovcnt, flags, dsa_done, idxd_task);
215 		break;
216 	case SPDK_ACCEL_OPC_DUALCAST:
217 		rc = idxd_submit_dualcast(chan, idxd_task, flags);
218 		break;
219 	case SPDK_ACCEL_OPC_COMPARE:
220 		rc = spdk_idxd_submit_compare(chan->chan, task->s.iovs, task->s.iovcnt,
221 					      task->s2.iovs, task->s2.iovcnt, flags,
222 					      dsa_done, idxd_task);
223 		break;
224 	case SPDK_ACCEL_OPC_FILL:
225 		rc = spdk_idxd_submit_fill(chan->chan, task->d.iovs, task->d.iovcnt,
226 					   task->fill_pattern, flags, dsa_done, idxd_task);
227 		break;
228 	case SPDK_ACCEL_OPC_CRC32C:
229 		rc = spdk_idxd_submit_crc32c(chan->chan, task->s.iovs, task->s.iovcnt, task->seed,
230 					     task->crc_dst, flags, dsa_done, idxd_task);
231 		break;
232 	case SPDK_ACCEL_OPC_COPY_CRC32C:
233 		rc = spdk_idxd_submit_copy_crc32c(chan->chan, task->d.iovs, task->d.iovcnt,
234 						  task->s.iovs, task->s.iovcnt,
235 						  task->seed, task->crc_dst, flags,
236 						  dsa_done, idxd_task);
237 		break;
238 	case SPDK_ACCEL_OPC_DIF_VERIFY:
239 		rc = spdk_idxd_submit_dif_check(chan->chan,
240 						task->s.iovs, task->s.iovcnt,
241 						task->dif.num_blocks, task->dif.ctx, flags,
242 						dsa_done, idxd_task);
243 		break;
244 	case SPDK_ACCEL_OPC_DIF_GENERATE_COPY:
245 		rc = spdk_idxd_submit_dif_insert(chan->chan,
246 						 task->d.iovs, task->d.iovcnt,
247 						 task->s.iovs, task->s.iovcnt,
248 						 task->dif.num_blocks, task->dif.ctx, flags,
249 						 dsa_done, idxd_task);
250 		break;
251 	case SPDK_ACCEL_OPC_DIF_VERIFY_COPY:
252 		/* For DIF strip operations, DSA may incorrectly report an overlapping buffer
253 		 * error if the destination buffer immediately precedes the source buffer.
254 		 * This is because DSA uses the transfer size in the descriptor for both
255 		 * the source and destination buffers when checking for buffer overlap.
256 		 * Since the transfer size applies to the source buffer, which is larger
257 		 * than the destination buffer by metadata, it should not be used as
258 		 * the destination buffer size. To avoid reporting errors by DSA, the software
259 		 * checks whether such an error condition can occur, and if so the software
260 		 * fallback is performed. */
261 		rc = check_dsa_dif_strip_overlap_bufs(task);
262 		if (rc == 0) {
263 			rc = spdk_idxd_submit_dif_strip(chan->chan,
264 							task->d.iovs, task->d.iovcnt,
265 							task->s.iovs, task->s.iovcnt,
266 							task->dif.num_blocks, task->dif.ctx, flags,
267 							dsa_done, idxd_task);
268 		} else if (rc == -EFAULT) {
269 			rc = spdk_dif_verify_copy(task->d.iovs,
270 						  task->d.iovcnt,
271 						  task->s.iovs,
272 						  task->s.iovcnt,
273 						  task->dif.num_blocks,
274 						  task->dif.ctx,
275 						  task->dif.err);
276 			idxd_task->task.status = rc;
277 			spdk_thread_send_msg(spdk_get_thread(), spdk_accel_sw_task_complete, (void *)&idxd_task->task);
278 			rc = 0;
279 		}
280 		break;
281 	default:
282 		assert(false);
283 		rc = -EINVAL;
284 		break;
285 	}
286 
287 	if (rc == 0) {
288 		chan->num_outstanding++;
289 		spdk_trace_record(TRACE_ACCEL_DSA_OP_SUBMIT, 0, 0, 0, chan->num_outstanding);
290 	}
291 
292 	return rc;
293 }
294 
295 static int
296 dsa_submit_task(struct spdk_io_channel *ch, struct spdk_accel_task *task)
297 {
298 	struct idxd_io_channel *chan = spdk_io_channel_get_ctx(ch);
299 	int rc = 0;
300 
301 	assert(STAILQ_NEXT(task, link) == NULL);
302 
303 	if (spdk_unlikely(chan->state == IDXD_CHANNEL_ERROR)) {
304 		spdk_accel_task_complete(task, -EINVAL);
305 		return 0;
306 	}
307 
308 	if (!STAILQ_EMPTY(&chan->queued_tasks)) {
309 		STAILQ_INSERT_TAIL(&chan->queued_tasks, task, link);
310 		return 0;
311 	}
312 
313 	rc = _process_single_task(ch, task);
314 	if (rc == -EBUSY) {
315 		STAILQ_INSERT_TAIL(&chan->queued_tasks, task, link);
316 	} else if (rc) {
317 		spdk_accel_task_complete(task, rc);
318 	}
319 
320 	return 0;
321 }
322 
323 static int
324 dsa_submit_queued_tasks(struct idxd_io_channel *chan)
325 {
326 	struct spdk_accel_task *task, *tmp;
327 	struct spdk_io_channel *ch = spdk_io_channel_from_ctx(chan);
328 	int rc = 0;
329 
330 	if (spdk_unlikely(chan->state == IDXD_CHANNEL_ERROR)) {
331 		/* Complete queued tasks with error and clear the list */
332 		while ((task = STAILQ_FIRST(&chan->queued_tasks))) {
333 			STAILQ_REMOVE_HEAD(&chan->queued_tasks, link);
334 			spdk_accel_task_complete(task, -EINVAL);
335 		}
336 		return 0;
337 	}
338 
339 	STAILQ_FOREACH_SAFE(task, &chan->queued_tasks, link, tmp) {
340 		rc = _process_single_task(ch, task);
341 		if (rc == -EBUSY) {
342 			return rc;
343 		}
344 		STAILQ_REMOVE_HEAD(&chan->queued_tasks, link);
345 		if (rc) {
346 			spdk_accel_task_complete(task, rc);
347 		}
348 	}
349 
350 	return 0;
351 }
352 
353 static int
354 idxd_poll(void *arg)
355 {
356 	struct idxd_io_channel *chan = arg;
357 	int count;
358 
359 	count = spdk_idxd_process_events(chan->chan);
360 
361 	/* Check if there are any pending ops to process if the channel is active */
362 	if (!STAILQ_EMPTY(&chan->queued_tasks)) {
363 		dsa_submit_queued_tasks(chan);
364 	}
365 
366 	return count > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE;
367 }
368 
369 static size_t
370 accel_dsa_get_ctx_size(void)
371 {
372 	return sizeof(struct idxd_task);
373 }
374 
375 static bool
376 dsa_supports_opcode(enum spdk_accel_opcode opc)
377 {
378 	if (!g_dsa_initialized) {
379 		assert(0);
380 		return false;
381 	}
382 
383 	switch (opc) {
384 	case SPDK_ACCEL_OPC_COPY:
385 	case SPDK_ACCEL_OPC_FILL:
386 	case SPDK_ACCEL_OPC_DUALCAST:
387 	case SPDK_ACCEL_OPC_COMPARE:
388 	case SPDK_ACCEL_OPC_CRC32C:
389 	case SPDK_ACCEL_OPC_COPY_CRC32C:
390 		return true;
391 	case SPDK_ACCEL_OPC_DIF_VERIFY:
392 	case SPDK_ACCEL_OPC_DIF_GENERATE_COPY:
393 	case SPDK_ACCEL_OPC_DIF_VERIFY_COPY:
394 		/* Supported only if the IOMMU is enabled */
395 		return spdk_iommu_is_enabled();
396 	default:
397 		return false;
398 	}
399 }
400 
401 static int accel_dsa_init(void);
402 static void accel_dsa_exit(void *ctx);
403 static void accel_dsa_write_config_json(struct spdk_json_write_ctx *w);
404 
405 static struct spdk_accel_module_if g_dsa_module = {
406 	.module_init = accel_dsa_init,
407 	.module_fini = accel_dsa_exit,
408 	.write_config_json = accel_dsa_write_config_json,
409 	.get_ctx_size = accel_dsa_get_ctx_size,
410 	.name			= "dsa",
411 	.supports_opcode	= dsa_supports_opcode,
412 	.get_io_channel		= dsa_get_io_channel,
413 	.submit_tasks		= dsa_submit_task
414 };
415 
416 static int
417 dsa_create_cb(void *io_device, void *ctx_buf)
418 {
419 	struct idxd_io_channel *chan = ctx_buf;
420 	struct idxd_device *dsa;
421 
422 	dsa = idxd_select_device(chan);
423 	if (dsa == NULL) {
424 		SPDK_ERRLOG("Failed to get an idxd channel\n");
425 		return -EINVAL;
426 	}
427 
428 	chan->dev = dsa;
429 	chan->poller = SPDK_POLLER_REGISTER(idxd_poll, chan, 0);
430 	STAILQ_INIT(&chan->queued_tasks);
431 	chan->num_outstanding = 0;
432 	chan->state = IDXD_CHANNEL_ACTIVE;
433 
434 	return 0;
435 }
436 
437 static void
438 dsa_destroy_cb(void *io_device, void *ctx_buf)
439 {
440 	struct idxd_io_channel *chan = ctx_buf;
441 
442 	spdk_poller_unregister(&chan->poller);
443 	spdk_idxd_put_channel(chan->chan);
444 }
445 
446 static struct spdk_io_channel *
447 dsa_get_io_channel(void)
448 {
449 	return spdk_get_io_channel(&g_dsa_module);
450 }
451 
452 static void
453 attach_cb(void *cb_ctx, struct spdk_idxd_device *idxd)
454 {
455 	struct idxd_device *dev;
456 
457 	dev = calloc(1, sizeof(*dev));
458 	if (dev == NULL) {
459 		SPDK_ERRLOG("Failed to allocate device struct\n");
460 		return;
461 	}
462 
463 	dev->dsa = idxd;
464 	if (g_next_dev == NULL) {
465 		g_next_dev = dev;
466 	}
467 
468 	TAILQ_INSERT_TAIL(&g_dsa_devices, dev, tailq);
469 	g_num_devices++;
470 }
471 
472 int
473 accel_dsa_enable_probe(bool kernel_mode)
474 {
475 	int rc;
476 
477 	if (g_dsa_enable) {
478 		return -EALREADY;
479 	}
480 
481 	rc = spdk_idxd_set_config(kernel_mode);
482 	if (rc != 0) {
483 		return rc;
484 	}
485 
486 	spdk_accel_module_list_add(&g_dsa_module);
487 	g_kernel_mode = kernel_mode;
488 	g_dsa_enable = true;
489 
490 	return 0;
491 }
492 
493 static bool
494 probe_cb(void *cb_ctx, struct spdk_pci_device *dev)
495 {
496 	if (dev->id.device_id == PCI_DEVICE_ID_INTEL_DSA) {
497 		return true;
498 	}
499 
500 	return false;
501 }
502 
503 static int
504 accel_dsa_init(void)
505 {
506 	if (!g_dsa_enable) {
507 		return -EINVAL;
508 	}
509 
510 	if (spdk_idxd_probe(NULL, attach_cb, probe_cb) != 0) {
511 		SPDK_ERRLOG("spdk_idxd_probe() failed\n");
512 		return -EINVAL;
513 	}
514 
515 	if (TAILQ_EMPTY(&g_dsa_devices)) {
516 		return -ENODEV;
517 	}
518 
519 	g_dsa_initialized = true;
520 	spdk_io_device_register(&g_dsa_module, dsa_create_cb, dsa_destroy_cb,
521 				sizeof(struct idxd_io_channel), "dsa_accel_module");
522 	return 0;
523 }
524 
525 static void
526 accel_dsa_exit(void *ctx)
527 {
528 	struct idxd_device *dev;
529 
530 	if (g_dsa_initialized) {
531 		spdk_io_device_unregister(&g_dsa_module, NULL);
532 		g_dsa_initialized = false;
533 	}
534 
535 	while (!TAILQ_EMPTY(&g_dsa_devices)) {
536 		dev = TAILQ_FIRST(&g_dsa_devices);
537 		TAILQ_REMOVE(&g_dsa_devices, dev, tailq);
538 		spdk_idxd_detach(dev->dsa);
539 		free(dev);
540 	}
541 
542 	spdk_accel_module_finish();
543 }
544 
545 static void
546 accel_dsa_write_config_json(struct spdk_json_write_ctx *w)
547 {
548 	if (g_dsa_enable) {
549 		spdk_json_write_object_begin(w);
550 		spdk_json_write_named_string(w, "method", "dsa_scan_accel_module");
551 		spdk_json_write_named_object_begin(w, "params");
552 		spdk_json_write_named_bool(w, "config_kernel_mode", g_kernel_mode);
553 		spdk_json_write_object_end(w);
554 		spdk_json_write_object_end(w);
555 	}
556 }
557 
558 SPDK_TRACE_REGISTER_FN(dsa_trace, "dsa", TRACE_GROUP_ACCEL_DSA)
559 {
560 	spdk_trace_register_description("DSA_OP_SUBMIT", TRACE_ACCEL_DSA_OP_SUBMIT, OWNER_TYPE_NONE,
561 					OBJECT_NONE, 0,
562 					SPDK_TRACE_ARG_TYPE_INT, "count");
563 	spdk_trace_register_description("DSA_OP_COMPLETE", TRACE_ACCEL_DSA_OP_COMPLETE, OWNER_TYPE_NONE,
564 					OBJECT_NONE,
565 					0, SPDK_TRACE_ARG_TYPE_INT, "count");
566 }
567 
568 SPDK_LOG_REGISTER_COMPONENT(accel_dsa)
569