1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2022 Intel Corporation. 3 * Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. 4 * All rights reserved. 5 */ 6 7 #include "accel_dsa.h" 8 9 #include "spdk/stdinc.h" 10 11 #include "spdk/accel_module.h" 12 #include "spdk/log.h" 13 #include "spdk_internal/idxd.h" 14 15 #include "spdk/env.h" 16 #include "spdk/event.h" 17 #include "spdk/likely.h" 18 #include "spdk/thread.h" 19 #include "spdk/idxd.h" 20 #include "spdk/util.h" 21 #include "spdk/json.h" 22 #include "spdk/trace.h" 23 #include "spdk_internal/trace_defs.h" 24 25 static bool g_dsa_enable = false; 26 static bool g_kernel_mode = false; 27 28 enum channel_state { 29 IDXD_CHANNEL_ACTIVE, 30 IDXD_CHANNEL_ERROR, 31 }; 32 33 static bool g_dsa_initialized = false; 34 35 struct idxd_device { 36 struct spdk_idxd_device *dsa; 37 TAILQ_ENTRY(idxd_device) tailq; 38 }; 39 static TAILQ_HEAD(, idxd_device) g_dsa_devices = TAILQ_HEAD_INITIALIZER(g_dsa_devices); 40 static struct idxd_device *g_next_dev = NULL; 41 static uint32_t g_num_devices = 0; 42 static pthread_mutex_t g_dev_lock = PTHREAD_MUTEX_INITIALIZER; 43 44 struct idxd_task { 45 struct spdk_accel_task task; 46 struct idxd_io_channel *chan; 47 }; 48 49 struct idxd_io_channel { 50 struct spdk_idxd_io_channel *chan; 51 struct idxd_device *dev; 52 enum channel_state state; 53 struct spdk_poller *poller; 54 uint32_t num_outstanding; 55 TAILQ_HEAD(, spdk_accel_task) queued_tasks; 56 }; 57 58 static struct spdk_io_channel *dsa_get_io_channel(void); 59 60 static struct idxd_device * 61 idxd_select_device(struct idxd_io_channel *chan) 62 { 63 uint32_t count = 0; 64 struct idxd_device *dev; 65 uint32_t socket_id = spdk_env_get_socket_id(spdk_env_get_current_core()); 66 67 /* 68 * We allow channels to share underlying devices, 69 * selection is round-robin based with a limitation 70 * on how many channel can share one device. 71 */ 72 do { 73 /* select next device */ 74 pthread_mutex_lock(&g_dev_lock); 75 g_next_dev = TAILQ_NEXT(g_next_dev, tailq); 76 if (g_next_dev == NULL) { 77 g_next_dev = TAILQ_FIRST(&g_dsa_devices); 78 } 79 dev = g_next_dev; 80 pthread_mutex_unlock(&g_dev_lock); 81 82 if (socket_id != spdk_idxd_get_socket(dev->dsa)) { 83 continue; 84 } 85 86 /* 87 * Now see if a channel is available on this one. We only 88 * allow a specific number of channels to share a device 89 * to limit outstanding IO for flow control purposes. 90 */ 91 chan->chan = spdk_idxd_get_channel(dev->dsa); 92 if (chan->chan != NULL) { 93 SPDK_DEBUGLOG(accel_dsa, "On socket %d using device on socket %d\n", 94 socket_id, spdk_idxd_get_socket(dev->dsa)); 95 return dev; 96 } 97 } while (count++ < g_num_devices); 98 99 /* We are out of available channels and/or devices for the local socket. We fix the number 100 * of channels that we allocate per device and only allocate devices on the same socket 101 * that the current thread is on. If on a 2 socket system it may be possible to avoid 102 * this situation by spreading threads across the sockets. 103 */ 104 SPDK_ERRLOG("No more DSA devices available on the local socket.\n"); 105 return NULL; 106 } 107 108 static void 109 dsa_done(void *cb_arg, int status) 110 { 111 struct idxd_task *idxd_task = cb_arg; 112 struct idxd_io_channel *chan; 113 114 chan = idxd_task->chan; 115 116 assert(chan->num_outstanding > 0); 117 spdk_trace_record(TRACE_ACCEL_DSA_OP_COMPLETE, 0, 0, 0, chan->num_outstanding - 1); 118 chan->num_outstanding--; 119 120 spdk_accel_task_complete(&idxd_task->task, status); 121 } 122 123 static int 124 idxd_submit_dualcast(struct idxd_io_channel *ch, struct idxd_task *idxd_task, int flags) 125 { 126 struct spdk_accel_task *task = &idxd_task->task; 127 128 if (spdk_unlikely(task->d.iovcnt != 1 || task->d2.iovcnt != 1 || task->s.iovcnt != 1)) { 129 return -EINVAL; 130 } 131 132 if (spdk_unlikely(task->d.iovs[0].iov_len != task->s.iovs[0].iov_len || 133 task->d.iovs[0].iov_len != task->d2.iovs[0].iov_len)) { 134 return -EINVAL; 135 } 136 137 return spdk_idxd_submit_dualcast(ch->chan, task->d.iovs[0].iov_base, 138 task->d2.iovs[0].iov_base, task->s.iovs[0].iov_base, 139 task->d.iovs[0].iov_len, flags, dsa_done, idxd_task); 140 } 141 142 static int 143 _process_single_task(struct spdk_io_channel *ch, struct spdk_accel_task *task) 144 { 145 struct idxd_io_channel *chan = spdk_io_channel_get_ctx(ch); 146 struct idxd_task *idxd_task; 147 int rc = 0, flags = 0; 148 149 idxd_task = SPDK_CONTAINEROF(task, struct idxd_task, task); 150 idxd_task->chan = chan; 151 152 switch (task->op_code) { 153 case ACCEL_OPC_COPY: 154 rc = spdk_idxd_submit_copy(chan->chan, task->d.iovs, task->d.iovcnt, 155 task->s.iovs, task->s.iovcnt, flags, dsa_done, idxd_task); 156 break; 157 case ACCEL_OPC_DUALCAST: 158 rc = idxd_submit_dualcast(chan, idxd_task, flags); 159 break; 160 case ACCEL_OPC_COMPARE: 161 rc = spdk_idxd_submit_compare(chan->chan, task->s.iovs, task->s.iovcnt, 162 task->s2.iovs, task->s2.iovcnt, flags, 163 dsa_done, idxd_task); 164 break; 165 case ACCEL_OPC_FILL: 166 rc = spdk_idxd_submit_fill(chan->chan, task->d.iovs, task->d.iovcnt, 167 task->fill_pattern, flags, dsa_done, idxd_task); 168 break; 169 case ACCEL_OPC_CRC32C: 170 rc = spdk_idxd_submit_crc32c(chan->chan, task->s.iovs, task->s.iovcnt, task->seed, 171 task->crc_dst, flags, dsa_done, idxd_task); 172 break; 173 case ACCEL_OPC_COPY_CRC32C: 174 rc = spdk_idxd_submit_copy_crc32c(chan->chan, task->d.iovs, task->d.iovcnt, 175 task->s.iovs, task->s.iovcnt, 176 task->seed, task->crc_dst, flags, 177 dsa_done, idxd_task); 178 break; 179 default: 180 assert(false); 181 rc = -EINVAL; 182 break; 183 } 184 185 if (rc == 0) { 186 chan->num_outstanding++; 187 spdk_trace_record(TRACE_ACCEL_DSA_OP_SUBMIT, 0, 0, 0, chan->num_outstanding); 188 } 189 190 return rc; 191 } 192 193 static int 194 dsa_submit_tasks(struct spdk_io_channel *ch, struct spdk_accel_task *first_task) 195 { 196 struct idxd_io_channel *chan = spdk_io_channel_get_ctx(ch); 197 struct spdk_accel_task *task, *tmp; 198 int rc = 0; 199 200 task = first_task; 201 202 if (chan->state == IDXD_CHANNEL_ERROR) { 203 while (task) { 204 tmp = TAILQ_NEXT(task, link); 205 spdk_accel_task_complete(task, -EINVAL); 206 task = tmp; 207 } 208 return 0; 209 } 210 211 if (!TAILQ_EMPTY(&chan->queued_tasks)) { 212 goto queue_tasks; 213 } 214 215 /* The caller will either submit a single task or a group of tasks that are 216 * linked together but they cannot be on a list. For example, see idxd_poll() 217 * where a list of queued tasks is being resubmitted, the list they are on 218 * is initialized after saving off the first task from the list which is then 219 * passed in here. Similar thing is done in the accel framework. 220 */ 221 while (task) { 222 tmp = TAILQ_NEXT(task, link); 223 rc = _process_single_task(ch, task); 224 225 if (rc == -EBUSY) { 226 goto queue_tasks; 227 } else if (rc) { 228 spdk_accel_task_complete(task, rc); 229 } 230 task = tmp; 231 } 232 233 return 0; 234 235 queue_tasks: 236 while (task != NULL) { 237 tmp = TAILQ_NEXT(task, link); 238 TAILQ_INSERT_TAIL(&chan->queued_tasks, task, link); 239 task = tmp; 240 } 241 return 0; 242 } 243 244 static int 245 idxd_poll(void *arg) 246 { 247 struct idxd_io_channel *chan = arg; 248 struct spdk_accel_task *task = NULL; 249 struct idxd_task *idxd_task; 250 int count; 251 252 count = spdk_idxd_process_events(chan->chan); 253 254 /* Check if there are any pending ops to process if the channel is active */ 255 if (chan->state == IDXD_CHANNEL_ACTIVE) { 256 /* Submit queued tasks */ 257 if (!TAILQ_EMPTY(&chan->queued_tasks)) { 258 task = TAILQ_FIRST(&chan->queued_tasks); 259 idxd_task = SPDK_CONTAINEROF(task, struct idxd_task, task); 260 261 TAILQ_INIT(&chan->queued_tasks); 262 263 dsa_submit_tasks(spdk_io_channel_from_ctx(idxd_task->chan), task); 264 } 265 } 266 267 return count > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE; 268 } 269 270 static size_t 271 accel_dsa_get_ctx_size(void) 272 { 273 return sizeof(struct idxd_task); 274 } 275 276 static bool 277 dsa_supports_opcode(enum accel_opcode opc) 278 { 279 if (!g_dsa_initialized) { 280 assert(0); 281 return false; 282 } 283 284 switch (opc) { 285 case ACCEL_OPC_COPY: 286 case ACCEL_OPC_FILL: 287 case ACCEL_OPC_DUALCAST: 288 case ACCEL_OPC_COMPARE: 289 case ACCEL_OPC_CRC32C: 290 case ACCEL_OPC_COPY_CRC32C: 291 return true; 292 default: 293 return false; 294 } 295 } 296 297 static int accel_dsa_init(void); 298 static void accel_dsa_exit(void *ctx); 299 static void accel_dsa_write_config_json(struct spdk_json_write_ctx *w); 300 301 static struct spdk_accel_module_if g_dsa_module = { 302 .module_init = accel_dsa_init, 303 .module_fini = accel_dsa_exit, 304 .write_config_json = accel_dsa_write_config_json, 305 .get_ctx_size = accel_dsa_get_ctx_size, 306 .name = "dsa", 307 .supports_opcode = dsa_supports_opcode, 308 .get_io_channel = dsa_get_io_channel, 309 .submit_tasks = dsa_submit_tasks 310 }; 311 312 static int 313 dsa_create_cb(void *io_device, void *ctx_buf) 314 { 315 struct idxd_io_channel *chan = ctx_buf; 316 struct idxd_device *dsa; 317 318 dsa = idxd_select_device(chan); 319 if (dsa == NULL) { 320 SPDK_ERRLOG("Failed to get an idxd channel\n"); 321 return -EINVAL; 322 } 323 324 chan->dev = dsa; 325 chan->poller = SPDK_POLLER_REGISTER(idxd_poll, chan, 0); 326 TAILQ_INIT(&chan->queued_tasks); 327 chan->num_outstanding = 0; 328 chan->state = IDXD_CHANNEL_ACTIVE; 329 330 return 0; 331 } 332 333 static void 334 dsa_destroy_cb(void *io_device, void *ctx_buf) 335 { 336 struct idxd_io_channel *chan = ctx_buf; 337 338 spdk_poller_unregister(&chan->poller); 339 spdk_idxd_put_channel(chan->chan); 340 } 341 342 static struct spdk_io_channel * 343 dsa_get_io_channel(void) 344 { 345 return spdk_get_io_channel(&g_dsa_module); 346 } 347 348 static void 349 attach_cb(void *cb_ctx, struct spdk_idxd_device *idxd) 350 { 351 struct idxd_device *dev; 352 353 dev = calloc(1, sizeof(*dev)); 354 if (dev == NULL) { 355 SPDK_ERRLOG("Failed to allocate device struct\n"); 356 return; 357 } 358 359 dev->dsa = idxd; 360 if (g_next_dev == NULL) { 361 g_next_dev = dev; 362 } 363 364 TAILQ_INSERT_TAIL(&g_dsa_devices, dev, tailq); 365 g_num_devices++; 366 } 367 368 void 369 accel_dsa_enable_probe(bool kernel_mode) 370 { 371 g_kernel_mode = kernel_mode; 372 g_dsa_enable = true; 373 spdk_idxd_set_config(g_kernel_mode); 374 spdk_accel_module_list_add(&g_dsa_module); 375 } 376 377 static bool 378 probe_cb(void *cb_ctx, struct spdk_pci_device *dev) 379 { 380 if (dev->id.device_id == PCI_DEVICE_ID_INTEL_DSA) { 381 return true; 382 } 383 384 return false; 385 } 386 387 static int 388 accel_dsa_init(void) 389 { 390 if (!g_dsa_enable) { 391 return -EINVAL; 392 } 393 394 if (spdk_idxd_probe(NULL, attach_cb, probe_cb) != 0) { 395 SPDK_ERRLOG("spdk_idxd_probe() failed\n"); 396 return -EINVAL; 397 } 398 399 if (TAILQ_EMPTY(&g_dsa_devices)) { 400 SPDK_NOTICELOG("no available dsa devices\n"); 401 return -EINVAL; 402 } 403 404 g_dsa_initialized = true; 405 spdk_io_device_register(&g_dsa_module, dsa_create_cb, dsa_destroy_cb, 406 sizeof(struct idxd_io_channel), "dsa_accel_module"); 407 return 0; 408 } 409 410 static void 411 accel_dsa_exit(void *ctx) 412 { 413 struct idxd_device *dev; 414 415 if (g_dsa_initialized) { 416 spdk_io_device_unregister(&g_dsa_module, NULL); 417 g_dsa_initialized = false; 418 } 419 420 while (!TAILQ_EMPTY(&g_dsa_devices)) { 421 dev = TAILQ_FIRST(&g_dsa_devices); 422 TAILQ_REMOVE(&g_dsa_devices, dev, tailq); 423 spdk_idxd_detach(dev->dsa); 424 free(dev); 425 } 426 427 spdk_accel_module_finish(); 428 } 429 430 static void 431 accel_dsa_write_config_json(struct spdk_json_write_ctx *w) 432 { 433 if (g_dsa_enable) { 434 spdk_json_write_object_begin(w); 435 spdk_json_write_named_string(w, "method", "dsa_scan_accel_module"); 436 spdk_json_write_named_object_begin(w, "params"); 437 spdk_json_write_named_bool(w, "config_kernel_mode", g_kernel_mode); 438 spdk_json_write_object_end(w); 439 spdk_json_write_object_end(w); 440 } 441 } 442 443 SPDK_TRACE_REGISTER_FN(dsa_trace, "dsa", TRACE_GROUP_ACCEL_DSA) 444 { 445 spdk_trace_register_description("DSA_OP_SUBMIT", TRACE_ACCEL_DSA_OP_SUBMIT, OWNER_NONE, OBJECT_NONE, 446 0, 447 SPDK_TRACE_ARG_TYPE_INT, "count"); 448 spdk_trace_register_description("DSA_OP_COMPLETE", TRACE_ACCEL_DSA_OP_COMPLETE, OWNER_NONE, 449 OBJECT_NONE, 450 0, SPDK_TRACE_ARG_TYPE_INT, "count"); 451 } 452 453 SPDK_LOG_REGISTER_COMPONENT(accel_dsa) 454