xref: /spdk/module/accel/dsa/accel_dsa.c (revision 877573897ad52be4fa8989f7617bd655b87e05c4)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (C) 2022 Intel Corporation.
3  *   All rights reserved.
4  */
5 
6 #include "accel_dsa.h"
7 
8 #include "spdk/stdinc.h"
9 
10 #include "spdk_internal/accel_module.h"
11 #include "spdk/log.h"
12 #include "spdk_internal/idxd.h"
13 
14 #include "spdk/env.h"
15 #include "spdk/event.h"
16 #include "spdk/thread.h"
17 #include "spdk/idxd.h"
18 #include "spdk/util.h"
19 #include "spdk/json.h"
20 #include "spdk/trace.h"
21 #include "spdk_internal/trace_defs.h"
22 
23 static bool g_dsa_enable = false;
24 static bool g_kernel_mode = false;
25 
26 enum channel_state {
27 	IDXD_CHANNEL_ACTIVE,
28 	IDXD_CHANNEL_ERROR,
29 };
30 
31 static bool g_dsa_initialized = false;
32 
33 struct idxd_device {
34 	struct				spdk_idxd_device *dsa;
35 	TAILQ_ENTRY(idxd_device)	tailq;
36 };
37 static TAILQ_HEAD(, idxd_device) g_dsa_devices = TAILQ_HEAD_INITIALIZER(g_dsa_devices);
38 static struct idxd_device *g_next_dev = NULL;
39 static uint32_t g_num_devices = 0;
40 static pthread_mutex_t g_dev_lock = PTHREAD_MUTEX_INITIALIZER;
41 
42 struct idxd_task {
43 	struct spdk_accel_task	task;
44 	struct idxd_io_channel	*chan;
45 };
46 
47 struct idxd_io_channel {
48 	struct spdk_idxd_io_channel	*chan;
49 	struct idxd_device		*dev;
50 	enum channel_state		state;
51 	struct spdk_poller		*poller;
52 	uint32_t			num_outstanding;
53 	TAILQ_HEAD(, spdk_accel_task)	queued_tasks;
54 };
55 
56 static struct spdk_io_channel *dsa_get_io_channel(void);
57 
58 static struct idxd_device *
59 idxd_select_device(struct idxd_io_channel *chan)
60 {
61 	uint32_t count = 0;
62 	struct idxd_device *dev;
63 	uint32_t socket_id = spdk_env_get_socket_id(spdk_env_get_current_core());
64 
65 	/*
66 	 * We allow channels to share underlying devices,
67 	 * selection is round-robin based with a limitation
68 	 * on how many channel can share one device.
69 	 */
70 	do {
71 		/* select next device */
72 		pthread_mutex_lock(&g_dev_lock);
73 		g_next_dev = TAILQ_NEXT(g_next_dev, tailq);
74 		if (g_next_dev == NULL) {
75 			g_next_dev = TAILQ_FIRST(&g_dsa_devices);
76 		}
77 		dev = g_next_dev;
78 		pthread_mutex_unlock(&g_dev_lock);
79 
80 		if (socket_id != spdk_idxd_get_socket(dev->dsa)) {
81 			continue;
82 		}
83 
84 		/*
85 		 * Now see if a channel is available on this one. We only
86 		 * allow a specific number of channels to share a device
87 		 * to limit outstanding IO for flow control purposes.
88 		 */
89 		chan->chan = spdk_idxd_get_channel(dev->dsa);
90 		if (chan->chan != NULL) {
91 			SPDK_DEBUGLOG(accel_dsa, "On socket %d using device on socket %d\n",
92 				      socket_id, spdk_idxd_get_socket(dev->dsa));
93 			return dev;
94 		}
95 	} while (count++ < g_num_devices);
96 
97 	/* We are out of available channels and/or devices for the local socket. We fix the number
98 	 * of channels that we allocate per device and only allocate devices on the same socket
99 	 * that the current thread is on. If on a 2 socket system it may be possible to avoid
100 	 * this situation by spreading threads across the sockets.
101 	 */
102 	SPDK_ERRLOG("No more DSA devices available on the local socket.\n");
103 	return NULL;
104 }
105 
106 static void
107 dsa_done(void *cb_arg, int status)
108 {
109 	struct idxd_task *idxd_task = cb_arg;
110 	struct idxd_io_channel *chan;
111 
112 	chan = idxd_task->chan;
113 
114 	assert(chan->num_outstanding > 0);
115 	spdk_trace_record(TRACE_ACCEL_DSA_OP_COMPLETE, 0, 0, 0, chan->num_outstanding - 1);
116 	chan->num_outstanding--;
117 
118 	spdk_accel_task_complete(&idxd_task->task, status);
119 }
120 
121 static int
122 _process_single_task(struct spdk_io_channel *ch, struct spdk_accel_task *task)
123 {
124 	struct idxd_io_channel *chan = spdk_io_channel_get_ctx(ch);
125 	struct idxd_task *idxd_task;
126 	int rc = 0;
127 	struct iovec *iov;
128 	uint32_t iovcnt;
129 	struct iovec siov = {};
130 	struct iovec diov = {};
131 	int flags = 0;
132 
133 	idxd_task = SPDK_CONTAINEROF(task, struct idxd_task, task);
134 	idxd_task->chan = chan;
135 
136 	switch (task->op_code) {
137 	case ACCEL_OPC_COPY:
138 		siov.iov_base = task->src;
139 		siov.iov_len = task->nbytes;
140 		diov.iov_base = task->dst;
141 		diov.iov_len = task->nbytes;
142 		if (task->flags & ACCEL_FLAG_PERSISTENT) {
143 			flags |= SPDK_IDXD_FLAG_PERSISTENT;
144 			flags |= SPDK_IDXD_FLAG_NONTEMPORAL;
145 		}
146 		rc = spdk_idxd_submit_copy(chan->chan, &diov, 1, &siov, 1, flags, dsa_done, idxd_task);
147 		break;
148 	case ACCEL_OPC_DUALCAST:
149 		if (task->flags & ACCEL_FLAG_PERSISTENT) {
150 			flags |= SPDK_IDXD_FLAG_PERSISTENT;
151 			flags |= SPDK_IDXD_FLAG_NONTEMPORAL;
152 		}
153 		rc = spdk_idxd_submit_dualcast(chan->chan, task->dst, task->dst2, task->src, task->nbytes,
154 					       flags, dsa_done, idxd_task);
155 		break;
156 	case ACCEL_OPC_COMPARE:
157 		siov.iov_base = task->src;
158 		siov.iov_len = task->nbytes;
159 		diov.iov_base = task->dst;
160 		diov.iov_len = task->nbytes;
161 		rc = spdk_idxd_submit_compare(chan->chan, &siov, 1, &diov, 1, flags, dsa_done, idxd_task);
162 		break;
163 	case ACCEL_OPC_FILL:
164 		diov.iov_base = task->dst;
165 		diov.iov_len = task->nbytes;
166 		if (task->flags & ACCEL_FLAG_PERSISTENT) {
167 			flags |= SPDK_IDXD_FLAG_PERSISTENT;
168 			flags |= SPDK_IDXD_FLAG_NONTEMPORAL;
169 		}
170 		rc = spdk_idxd_submit_fill(chan->chan, &diov, 1, task->fill_pattern, flags, dsa_done,
171 					   idxd_task);
172 		break;
173 	case ACCEL_OPC_CRC32C:
174 		if (task->s.iovcnt == 0) {
175 			siov.iov_base = task->src;
176 			siov.iov_len = task->nbytes;
177 			iov = &siov;
178 			iovcnt = 1;
179 		} else {
180 			iov = task->s.iovs;
181 			iovcnt = task->s.iovcnt;
182 		}
183 		rc = spdk_idxd_submit_crc32c(chan->chan, iov, iovcnt, task->seed, task->crc_dst,
184 					     flags, dsa_done, idxd_task);
185 		break;
186 	case ACCEL_OPC_COPY_CRC32C:
187 		if (task->s.iovcnt == 0) {
188 			siov.iov_base = task->src;
189 			siov.iov_len = task->nbytes;
190 			iov = &siov;
191 			iovcnt = 1;
192 		} else {
193 			iov = task->s.iovs;
194 			iovcnt = task->s.iovcnt;
195 		}
196 		diov.iov_base = task->dst;
197 		diov.iov_len = task->nbytes;
198 		if (task->flags & ACCEL_FLAG_PERSISTENT) {
199 			flags |= SPDK_IDXD_FLAG_PERSISTENT;
200 			flags |= SPDK_IDXD_FLAG_NONTEMPORAL;
201 		}
202 		rc = spdk_idxd_submit_copy_crc32c(chan->chan, &diov, 1, iov, iovcnt,
203 						  task->seed, task->crc_dst, flags,
204 						  dsa_done, idxd_task);
205 		break;
206 	default:
207 		assert(false);
208 		rc = -EINVAL;
209 		break;
210 	}
211 
212 	if (rc == 0) {
213 		chan->num_outstanding++;
214 		spdk_trace_record(TRACE_ACCEL_DSA_OP_SUBMIT, 0, 0, 0, chan->num_outstanding);
215 	}
216 
217 	return rc;
218 }
219 
220 static int
221 dsa_submit_tasks(struct spdk_io_channel *ch, struct spdk_accel_task *first_task)
222 {
223 	struct idxd_io_channel *chan = spdk_io_channel_get_ctx(ch);
224 	struct spdk_accel_task *task, *tmp;
225 	int rc = 0;
226 
227 	task = first_task;
228 
229 	if (chan->state == IDXD_CHANNEL_ERROR) {
230 		while (task) {
231 			tmp = TAILQ_NEXT(task, link);
232 			spdk_accel_task_complete(task, -EINVAL);
233 			task = tmp;
234 		}
235 		return 0;
236 	}
237 
238 	if (!TAILQ_EMPTY(&chan->queued_tasks)) {
239 		goto queue_tasks;
240 	}
241 
242 	/* The caller will either submit a single task or a group of tasks that are
243 	 * linked together but they cannot be on a list. For example, see idxd_poll()
244 	 * where a list of queued tasks is being resubmitted, the list they are on
245 	 * is initialized after saving off the first task from the list which is then
246 	 * passed in here.  Similar thing is done in the accel framework.
247 	 */
248 	while (task) {
249 		tmp = TAILQ_NEXT(task, link);
250 		rc = _process_single_task(ch, task);
251 
252 		if (rc == -EBUSY) {
253 			goto queue_tasks;
254 		} else if (rc) {
255 			spdk_accel_task_complete(task, rc);
256 		}
257 		task = tmp;
258 	}
259 
260 	return 0;
261 
262 queue_tasks:
263 	while (task != NULL) {
264 		tmp = TAILQ_NEXT(task, link);
265 		TAILQ_INSERT_TAIL(&chan->queued_tasks, task, link);
266 		task = tmp;
267 	}
268 	return 0;
269 }
270 
271 static int
272 idxd_poll(void *arg)
273 {
274 	struct idxd_io_channel *chan = arg;
275 	struct spdk_accel_task *task = NULL;
276 	struct idxd_task *idxd_task;
277 	int count;
278 
279 	count = spdk_idxd_process_events(chan->chan);
280 
281 	/* Check if there are any pending ops to process if the channel is active */
282 	if (chan->state == IDXD_CHANNEL_ACTIVE) {
283 		/* Submit queued tasks */
284 		if (!TAILQ_EMPTY(&chan->queued_tasks)) {
285 			task = TAILQ_FIRST(&chan->queued_tasks);
286 			idxd_task = SPDK_CONTAINEROF(task, struct idxd_task, task);
287 
288 			TAILQ_INIT(&chan->queued_tasks);
289 
290 			dsa_submit_tasks(spdk_io_channel_from_ctx(idxd_task->chan), task);
291 		}
292 	}
293 
294 	return count > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE;
295 }
296 
297 static size_t
298 accel_dsa_get_ctx_size(void)
299 {
300 	return sizeof(struct idxd_task);
301 }
302 
303 static bool
304 dsa_supports_opcode(enum accel_opcode opc)
305 {
306 	if (!g_dsa_initialized) {
307 		return false;
308 	}
309 
310 	switch (opc) {
311 	case ACCEL_OPC_COPY:
312 	case ACCEL_OPC_FILL:
313 	case ACCEL_OPC_DUALCAST:
314 	case ACCEL_OPC_COMPARE:
315 	case ACCEL_OPC_CRC32C:
316 	case ACCEL_OPC_COPY_CRC32C:
317 		return true;
318 	default:
319 		return false;
320 	}
321 }
322 
323 static int accel_dsa_init(void);
324 static void accel_dsa_exit(void *ctx);
325 static void accel_dsa_write_config_json(struct spdk_json_write_ctx *w);
326 
327 static struct spdk_accel_module_if g_dsa_module = {
328 	.module_init = accel_dsa_init,
329 	.module_fini = accel_dsa_exit,
330 	.write_config_json = accel_dsa_write_config_json,
331 	.get_ctx_size = accel_dsa_get_ctx_size,
332 	.name			= "dsa",
333 	.supports_opcode	= dsa_supports_opcode,
334 	.get_io_channel		= dsa_get_io_channel,
335 	.submit_tasks		= dsa_submit_tasks
336 };
337 
338 SPDK_ACCEL_MODULE_REGISTER(dsa, &g_dsa_module)
339 
340 static int
341 dsa_create_cb(void *io_device, void *ctx_buf)
342 {
343 	struct idxd_io_channel *chan = ctx_buf;
344 	struct idxd_device *dsa;
345 
346 	dsa = idxd_select_device(chan);
347 	if (dsa == NULL) {
348 		SPDK_ERRLOG("Failed to get an idxd channel\n");
349 		return -EINVAL;
350 	}
351 
352 	chan->dev = dsa;
353 	chan->poller = SPDK_POLLER_REGISTER(idxd_poll, chan, 0);
354 	TAILQ_INIT(&chan->queued_tasks);
355 	chan->num_outstanding = 0;
356 	chan->state = IDXD_CHANNEL_ACTIVE;
357 
358 	return 0;
359 }
360 
361 static void
362 dsa_destroy_cb(void *io_device, void *ctx_buf)
363 {
364 	struct idxd_io_channel *chan = ctx_buf;
365 
366 	spdk_poller_unregister(&chan->poller);
367 	spdk_idxd_put_channel(chan->chan);
368 }
369 
370 static struct spdk_io_channel *
371 dsa_get_io_channel(void)
372 {
373 	return spdk_get_io_channel(&g_dsa_module);
374 }
375 
376 static void
377 attach_cb(void *cb_ctx, struct spdk_idxd_device *idxd)
378 {
379 	struct idxd_device *dev;
380 
381 	dev = calloc(1, sizeof(*dev));
382 	if (dev == NULL) {
383 		SPDK_ERRLOG("Failed to allocate device struct\n");
384 		return;
385 	}
386 
387 	dev->dsa = idxd;
388 	if (g_next_dev == NULL) {
389 		g_next_dev = dev;
390 	}
391 
392 	TAILQ_INSERT_TAIL(&g_dsa_devices, dev, tailq);
393 	g_num_devices++;
394 }
395 
396 void
397 accel_dsa_enable_probe(bool kernel_mode)
398 {
399 	g_kernel_mode = kernel_mode;
400 	g_dsa_enable = true;
401 	spdk_idxd_set_config(g_kernel_mode);
402 }
403 
404 static bool
405 probe_cb(void *cb_ctx, struct spdk_pci_device *dev)
406 {
407 	if (dev->id.device_id == PCI_DEVICE_ID_INTEL_DSA) {
408 		return true;
409 	}
410 
411 	return false;
412 }
413 
414 static int
415 accel_dsa_init(void)
416 {
417 	if (!g_dsa_enable) {
418 		return -EINVAL;
419 	}
420 
421 	if (spdk_idxd_probe(NULL, attach_cb, probe_cb) != 0) {
422 		SPDK_ERRLOG("spdk_idxd_probe() failed\n");
423 		return -EINVAL;
424 	}
425 
426 	if (TAILQ_EMPTY(&g_dsa_devices)) {
427 		SPDK_NOTICELOG("no available dsa devices\n");
428 		return -EINVAL;
429 	}
430 
431 	g_dsa_initialized = true;
432 	SPDK_NOTICELOG("Accel framework DSA module initialized.\n");
433 	spdk_io_device_register(&g_dsa_module, dsa_create_cb, dsa_destroy_cb,
434 				sizeof(struct idxd_io_channel), "dsa_accel_module");
435 	return 0;
436 }
437 
438 static void
439 accel_dsa_exit(void *ctx)
440 {
441 	struct idxd_device *dev;
442 
443 	if (g_dsa_initialized) {
444 		spdk_io_device_unregister(&g_dsa_module, NULL);
445 		g_dsa_initialized = false;
446 	}
447 
448 	while (!TAILQ_EMPTY(&g_dsa_devices)) {
449 		dev = TAILQ_FIRST(&g_dsa_devices);
450 		TAILQ_REMOVE(&g_dsa_devices, dev, tailq);
451 		spdk_idxd_detach(dev->dsa);
452 		free(dev);
453 	}
454 
455 	spdk_accel_module_finish();
456 }
457 
458 static void
459 accel_dsa_write_config_json(struct spdk_json_write_ctx *w)
460 {
461 	if (g_dsa_enable) {
462 		spdk_json_write_object_begin(w);
463 		spdk_json_write_named_string(w, "method", "dsa_scan_accel_module");
464 		spdk_json_write_named_object_begin(w, "params");
465 		spdk_json_write_named_bool(w, "config_kernel_mode", g_kernel_mode);
466 		spdk_json_write_object_end(w);
467 		spdk_json_write_object_end(w);
468 	}
469 }
470 
471 SPDK_TRACE_REGISTER_FN(dsa_trace, "dsa", TRACE_GROUP_ACCEL_DSA)
472 {
473 	spdk_trace_register_description("DSA_OP_SUBMIT", TRACE_ACCEL_DSA_OP_SUBMIT, OWNER_NONE, OBJECT_NONE,
474 					0,
475 					SPDK_TRACE_ARG_TYPE_INT, "count");
476 	spdk_trace_register_description("DSA_OP_COMPLETE", TRACE_ACCEL_DSA_OP_COMPLETE, OWNER_NONE,
477 					OBJECT_NONE,
478 					0, SPDK_TRACE_ARG_TYPE_INT, "count");
479 }
480 
481 SPDK_LOG_REGISTER_COMPONENT(accel_dsa)
482