1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) 2022 Marvell. 3 */ 4 5 #include <rte_common.h> 6 #include <rte_dev.h> 7 #include <rte_devargs.h> 8 #include <rte_kvargs.h> 9 #include <rte_mldev.h> 10 #include <rte_mldev_pmd.h> 11 #include <rte_pci.h> 12 13 #include <eal_firmware.h> 14 15 #include <roc_api.h> 16 17 #include "cnxk_ml_dev.h" 18 #include "cnxk_ml_ops.h" 19 20 #define CN10K_ML_FW_PATH "fw_path" 21 #define CN10K_ML_FW_ENABLE_DPE_WARNINGS "enable_dpe_warnings" 22 #define CN10K_ML_FW_REPORT_DPE_WARNINGS "report_dpe_warnings" 23 #define CN10K_ML_DEV_CACHE_MODEL_DATA "cache_model_data" 24 #define CN10K_ML_OCM_ALLOC_MODE "ocm_alloc_mode" 25 #define CN10K_ML_DEV_HW_QUEUE_LOCK "hw_queue_lock" 26 #define CN10K_ML_OCM_PAGE_SIZE "ocm_page_size" 27 28 #define CN10K_ML_FW_PATH_DEFAULT "/lib/firmware/mlip-fw.bin" 29 #define CN10K_ML_FW_ENABLE_DPE_WARNINGS_DEFAULT 1 30 #define CN10K_ML_FW_REPORT_DPE_WARNINGS_DEFAULT 0 31 #define CN10K_ML_DEV_CACHE_MODEL_DATA_DEFAULT 1 32 #define CN10K_ML_OCM_ALLOC_MODE_DEFAULT "lowest" 33 #define CN10K_ML_DEV_HW_QUEUE_LOCK_DEFAULT 1 34 #define CN10K_ML_OCM_PAGE_SIZE_DEFAULT 16384 35 36 /* ML firmware macros */ 37 #define FW_MEMZONE_NAME "ml_cn10k_fw_mz" 38 #define FW_STACK_BUFFER_SIZE 0x40000 39 #define FW_DEBUG_BUFFER_SIZE (2 * 0x20000) 40 #define FW_EXCEPTION_BUFFER_SIZE 0x400 41 #define FW_LINKER_OFFSET 0x80000 42 #define FW_WAIT_CYCLES 100 43 44 /* Firmware flags */ 45 #define FW_ENABLE_DPE_WARNING_BITMASK BIT(0) 46 #define FW_REPORT_DPE_WARNING_BITMASK BIT(1) 47 #define FW_USE_DDR_POLL_ADDR_FP BIT(2) 48 49 static const char *const valid_args[] = {CN10K_ML_FW_PATH, 50 CN10K_ML_FW_ENABLE_DPE_WARNINGS, 51 CN10K_ML_FW_REPORT_DPE_WARNINGS, 52 CN10K_ML_DEV_CACHE_MODEL_DATA, 53 CN10K_ML_OCM_ALLOC_MODE, 54 CN10K_ML_DEV_HW_QUEUE_LOCK, 55 CN10K_ML_OCM_PAGE_SIZE, 56 NULL}; 57 58 /* Supported OCM page sizes: 1KB, 2KB, 4KB, 8KB and 16KB */ 59 static const int valid_ocm_page_size[] = {1024, 2048, 4096, 8192, 16384}; 60 61 /* Error type database */ 62 struct cn10k_ml_error_db ml_etype_db[] = { 63 {ML_CN10K_ETYPE_NO_ERROR, "NO_ERROR"}, {ML_CN10K_ETYPE_FW_NONFATAL, "FW_NON_FATAL"}, 64 {ML_CN10K_ETYPE_HW_NONFATAL, "HW_NON_FATAL"}, {ML_CN10K_ETYPE_HW_FATAL, "HW_FATAL"}, 65 {ML_CN10K_ETYPE_HW_WARNING, "HW_WARNING"}, {ML_CN10K_ETYPE_DRIVER, "DRIVER_ERROR"}, 66 {ML_CN10K_ETYPE_UNKNOWN, "UNKNOWN_ERROR"}, 67 }; 68 69 static int 70 parse_string_arg(const char *key __rte_unused, const char *value, void *extra_args) 71 { 72 if (value == NULL || extra_args == NULL) 73 return -EINVAL; 74 75 *(char **)extra_args = strdup(value); 76 77 if (!*(char **)extra_args) 78 return -ENOMEM; 79 80 return 0; 81 } 82 83 static int 84 parse_integer_arg(const char *key __rte_unused, const char *value, void *extra_args) 85 { 86 int *i = (int *)extra_args; 87 88 *i = atoi(value); 89 if (*i < 0) { 90 plt_err("Argument has to be positive."); 91 return -EINVAL; 92 } 93 94 return 0; 95 } 96 97 static int 98 cn10k_mldev_parse_devargs(struct rte_devargs *devargs, struct cn10k_ml_dev *cn10k_mldev) 99 { 100 bool enable_dpe_warnings_set = false; 101 bool report_dpe_warnings_set = false; 102 bool cache_model_data_set = false; 103 struct rte_kvargs *kvlist = NULL; 104 bool ocm_alloc_mode_set = false; 105 bool hw_queue_lock_set = false; 106 bool ocm_page_size_set = false; 107 char *ocm_alloc_mode = NULL; 108 bool fw_path_set = false; 109 char *fw_path = NULL; 110 int ret = 0; 111 bool found; 112 uint8_t i; 113 114 if (devargs == NULL) 115 goto check_args; 116 117 kvlist = rte_kvargs_parse(devargs->args, valid_args); 118 if (kvlist == NULL) { 119 plt_err("Error parsing devargs"); 120 return -EINVAL; 121 } 122 123 if (rte_kvargs_count(kvlist, CN10K_ML_FW_PATH) == 1) { 124 ret = rte_kvargs_process(kvlist, CN10K_ML_FW_PATH, &parse_string_arg, &fw_path); 125 if (ret < 0) { 126 plt_err("Error processing arguments, key = %s", CN10K_ML_FW_PATH); 127 ret = -EINVAL; 128 goto exit; 129 } 130 fw_path_set = true; 131 } 132 133 if (rte_kvargs_count(kvlist, CN10K_ML_FW_ENABLE_DPE_WARNINGS) == 1) { 134 ret = rte_kvargs_process(kvlist, CN10K_ML_FW_ENABLE_DPE_WARNINGS, 135 &parse_integer_arg, &cn10k_mldev->fw.enable_dpe_warnings); 136 if (ret < 0) { 137 plt_err("Error processing arguments, key = %s", 138 CN10K_ML_FW_ENABLE_DPE_WARNINGS); 139 ret = -EINVAL; 140 goto exit; 141 } 142 enable_dpe_warnings_set = true; 143 } 144 145 if (rte_kvargs_count(kvlist, CN10K_ML_FW_REPORT_DPE_WARNINGS) == 1) { 146 ret = rte_kvargs_process(kvlist, CN10K_ML_FW_REPORT_DPE_WARNINGS, 147 &parse_integer_arg, &cn10k_mldev->fw.report_dpe_warnings); 148 if (ret < 0) { 149 plt_err("Error processing arguments, key = %s", 150 CN10K_ML_FW_REPORT_DPE_WARNINGS); 151 ret = -EINVAL; 152 goto exit; 153 } 154 report_dpe_warnings_set = true; 155 } 156 157 if (rte_kvargs_count(kvlist, CN10K_ML_DEV_CACHE_MODEL_DATA) == 1) { 158 ret = rte_kvargs_process(kvlist, CN10K_ML_DEV_CACHE_MODEL_DATA, &parse_integer_arg, 159 &cn10k_mldev->cache_model_data); 160 if (ret < 0) { 161 plt_err("Error processing arguments, key = %s", 162 CN10K_ML_DEV_CACHE_MODEL_DATA); 163 ret = -EINVAL; 164 goto exit; 165 } 166 cache_model_data_set = true; 167 } 168 169 if (rte_kvargs_count(kvlist, CN10K_ML_OCM_ALLOC_MODE) == 1) { 170 ret = rte_kvargs_process(kvlist, CN10K_ML_OCM_ALLOC_MODE, &parse_string_arg, 171 &ocm_alloc_mode); 172 if (ret < 0) { 173 plt_err("Error processing arguments, key = %s", CN10K_ML_OCM_ALLOC_MODE); 174 ret = -EINVAL; 175 goto exit; 176 } 177 ocm_alloc_mode_set = true; 178 } 179 180 if (rte_kvargs_count(kvlist, CN10K_ML_DEV_HW_QUEUE_LOCK) == 1) { 181 ret = rte_kvargs_process(kvlist, CN10K_ML_DEV_HW_QUEUE_LOCK, &parse_integer_arg, 182 &cn10k_mldev->hw_queue_lock); 183 if (ret < 0) { 184 plt_err("Error processing arguments, key = %s", 185 CN10K_ML_DEV_HW_QUEUE_LOCK); 186 ret = -EINVAL; 187 goto exit; 188 } 189 hw_queue_lock_set = true; 190 } 191 192 if (rte_kvargs_count(kvlist, CN10K_ML_OCM_PAGE_SIZE) == 1) { 193 ret = rte_kvargs_process(kvlist, CN10K_ML_OCM_PAGE_SIZE, &parse_integer_arg, 194 &cn10k_mldev->ocm_page_size); 195 if (ret < 0) { 196 plt_err("Error processing arguments, key = %s", CN10K_ML_OCM_PAGE_SIZE); 197 ret = -EINVAL; 198 goto exit; 199 } 200 ocm_page_size_set = true; 201 } 202 203 check_args: 204 if (!fw_path_set) 205 cn10k_mldev->fw.path = CN10K_ML_FW_PATH_DEFAULT; 206 else 207 cn10k_mldev->fw.path = fw_path; 208 plt_info("ML: %s = %s", CN10K_ML_FW_PATH, cn10k_mldev->fw.path); 209 210 if (!enable_dpe_warnings_set) { 211 cn10k_mldev->fw.enable_dpe_warnings = CN10K_ML_FW_ENABLE_DPE_WARNINGS_DEFAULT; 212 } else { 213 if ((cn10k_mldev->fw.enable_dpe_warnings < 0) || 214 (cn10k_mldev->fw.enable_dpe_warnings > 1)) { 215 plt_err("Invalid argument, %s = %d", CN10K_ML_FW_ENABLE_DPE_WARNINGS, 216 cn10k_mldev->fw.enable_dpe_warnings); 217 ret = -EINVAL; 218 goto exit; 219 } 220 } 221 plt_info("ML: %s = %d", CN10K_ML_FW_ENABLE_DPE_WARNINGS, 222 cn10k_mldev->fw.enable_dpe_warnings); 223 224 if (!report_dpe_warnings_set) { 225 cn10k_mldev->fw.report_dpe_warnings = CN10K_ML_FW_REPORT_DPE_WARNINGS_DEFAULT; 226 } else { 227 if ((cn10k_mldev->fw.report_dpe_warnings < 0) || 228 (cn10k_mldev->fw.report_dpe_warnings > 1)) { 229 plt_err("Invalid argument, %s = %d", CN10K_ML_FW_REPORT_DPE_WARNINGS, 230 cn10k_mldev->fw.report_dpe_warnings); 231 ret = -EINVAL; 232 goto exit; 233 } 234 } 235 plt_info("ML: %s = %d", CN10K_ML_FW_REPORT_DPE_WARNINGS, 236 cn10k_mldev->fw.report_dpe_warnings); 237 238 if (!cache_model_data_set) { 239 cn10k_mldev->cache_model_data = CN10K_ML_DEV_CACHE_MODEL_DATA_DEFAULT; 240 } else { 241 if ((cn10k_mldev->cache_model_data < 0) || (cn10k_mldev->cache_model_data > 1)) { 242 plt_err("Invalid argument, %s = %d", CN10K_ML_DEV_CACHE_MODEL_DATA, 243 cn10k_mldev->cache_model_data); 244 ret = -EINVAL; 245 goto exit; 246 } 247 } 248 plt_info("ML: %s = %d", CN10K_ML_DEV_CACHE_MODEL_DATA, cn10k_mldev->cache_model_data); 249 250 if (!ocm_alloc_mode_set) { 251 cn10k_mldev->ocm.alloc_mode = CN10K_ML_OCM_ALLOC_MODE_DEFAULT; 252 } else { 253 if (!((strcmp(ocm_alloc_mode, "lowest") == 0) || 254 (strcmp(ocm_alloc_mode, "largest") == 0))) { 255 plt_err("Invalid argument, %s = %s", CN10K_ML_OCM_ALLOC_MODE, 256 ocm_alloc_mode); 257 ret = -EINVAL; 258 goto exit; 259 } 260 cn10k_mldev->ocm.alloc_mode = ocm_alloc_mode; 261 } 262 plt_info("ML: %s = %s", CN10K_ML_OCM_ALLOC_MODE, cn10k_mldev->ocm.alloc_mode); 263 264 if (!hw_queue_lock_set) { 265 cn10k_mldev->hw_queue_lock = CN10K_ML_DEV_HW_QUEUE_LOCK_DEFAULT; 266 } else { 267 if ((cn10k_mldev->hw_queue_lock < 0) || (cn10k_mldev->hw_queue_lock > 1)) { 268 plt_err("Invalid argument, %s = %d", CN10K_ML_DEV_HW_QUEUE_LOCK, 269 cn10k_mldev->hw_queue_lock); 270 ret = -EINVAL; 271 goto exit; 272 } 273 } 274 plt_info("ML: %s = %d", CN10K_ML_DEV_HW_QUEUE_LOCK, cn10k_mldev->hw_queue_lock); 275 276 if (!ocm_page_size_set) { 277 cn10k_mldev->ocm_page_size = CN10K_ML_OCM_PAGE_SIZE_DEFAULT; 278 } else { 279 if (cn10k_mldev->ocm_page_size < 0) { 280 plt_err("Invalid argument, %s = %d", CN10K_ML_OCM_PAGE_SIZE, 281 cn10k_mldev->ocm_page_size); 282 ret = -EINVAL; 283 goto exit; 284 } 285 286 found = false; 287 for (i = 0; i < PLT_DIM(valid_ocm_page_size); i++) { 288 if (cn10k_mldev->ocm_page_size == valid_ocm_page_size[i]) { 289 found = true; 290 break; 291 } 292 } 293 294 if (!found) { 295 plt_err("Unsupported ocm_page_size = %d", cn10k_mldev->ocm_page_size); 296 ret = -EINVAL; 297 goto exit; 298 } 299 } 300 plt_info("ML: %s = %d", CN10K_ML_OCM_PAGE_SIZE, cn10k_mldev->ocm_page_size); 301 302 exit: 303 rte_kvargs_free(kvlist); 304 305 return ret; 306 } 307 308 static int 309 cn10k_ml_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) 310 { 311 struct rte_ml_dev_pmd_init_params init_params; 312 struct cn10k_ml_dev *cn10k_mldev; 313 struct cnxk_ml_dev *cnxk_mldev; 314 char name[RTE_ML_STR_MAX]; 315 struct rte_ml_dev *dev; 316 int ret; 317 318 PLT_SET_USED(pci_drv); 319 320 if (cnxk_ml_dev_initialized == 1) { 321 plt_err("ML CNXK device already initialized!"); 322 plt_err("Cannot initialize CN10K PCI dev"); 323 return -EINVAL; 324 } 325 326 init_params = (struct rte_ml_dev_pmd_init_params){ 327 .socket_id = rte_socket_id(), .private_data_size = sizeof(struct cnxk_ml_dev)}; 328 329 ret = roc_plt_init(); 330 if (ret < 0) { 331 plt_err("Failed to initialize platform model"); 332 return ret; 333 } 334 335 rte_pci_device_name(&pci_dev->addr, name, sizeof(name)); 336 dev = rte_ml_dev_pmd_create(name, &pci_dev->device, &init_params); 337 if (dev == NULL) { 338 ret = -ENODEV; 339 goto error_exit; 340 } 341 342 /* Get private data space allocated */ 343 cnxk_mldev = dev->data->dev_private; 344 cnxk_mldev->mldev = dev; 345 cn10k_mldev = &cnxk_mldev->cn10k_mldev; 346 347 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 348 cn10k_mldev->roc.pci_dev = pci_dev; 349 350 ret = cn10k_mldev_parse_devargs(dev->device->devargs, cn10k_mldev); 351 if (ret) { 352 plt_err("Failed to parse devargs ret = %d", ret); 353 goto pmd_destroy; 354 } 355 356 ret = roc_ml_dev_init(&cn10k_mldev->roc); 357 if (ret) { 358 plt_err("Failed to initialize ML ROC, ret = %d", ret); 359 goto pmd_destroy; 360 } 361 362 dev->dev_ops = &cnxk_ml_ops; 363 } else { 364 plt_err("CN10K ML Ops are not supported on secondary process"); 365 dev->dev_ops = &ml_dev_dummy_ops; 366 } 367 368 dev->enqueue_burst = NULL; 369 dev->dequeue_burst = NULL; 370 dev->op_error_get = NULL; 371 372 cnxk_ml_dev_initialized = 1; 373 cnxk_mldev->type = CNXK_ML_DEV_TYPE_PCI; 374 cnxk_mldev->state = ML_CNXK_DEV_STATE_PROBED; 375 376 return 0; 377 378 pmd_destroy: 379 rte_ml_dev_pmd_destroy(dev); 380 381 error_exit: 382 plt_err("Could not create device (vendor_id: 0x%x device_id: 0x%x)", pci_dev->id.vendor_id, 383 pci_dev->id.device_id); 384 385 return ret; 386 } 387 388 static int 389 cn10k_ml_pci_remove(struct rte_pci_device *pci_dev) 390 { 391 struct cnxk_ml_dev *cnxk_mldev; 392 char name[RTE_ML_STR_MAX]; 393 struct rte_ml_dev *dev; 394 int ret; 395 396 if (pci_dev == NULL) 397 return -EINVAL; 398 399 rte_pci_device_name(&pci_dev->addr, name, sizeof(name)); 400 401 dev = rte_ml_dev_pmd_get_named_dev(name); 402 if (dev == NULL) 403 return -ENODEV; 404 405 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 406 cnxk_mldev = dev->data->dev_private; 407 ret = roc_ml_dev_fini(&cnxk_mldev->cn10k_mldev.roc); 408 if (ret) 409 return ret; 410 } 411 412 return rte_ml_dev_pmd_destroy(dev); 413 } 414 415 static void 416 cn10k_ml_fw_print_info(struct cn10k_ml_fw *fw) 417 { 418 plt_info("ML Firmware Version = %s", fw->req->cn10k_req.jd.fw_load.version); 419 420 plt_ml_dbg("Firmware capabilities = 0x%016lx", fw->req->cn10k_req.jd.fw_load.cap.u64); 421 plt_ml_dbg("Version = %s", fw->req->cn10k_req.jd.fw_load.version); 422 plt_ml_dbg("core0_debug_ptr = 0x%016lx", 423 fw->req->cn10k_req.jd.fw_load.debug.core0_debug_ptr); 424 plt_ml_dbg("core1_debug_ptr = 0x%016lx", 425 fw->req->cn10k_req.jd.fw_load.debug.core1_debug_ptr); 426 plt_ml_dbg("debug_buffer_size = %u bytes", 427 fw->req->cn10k_req.jd.fw_load.debug.debug_buffer_size); 428 plt_ml_dbg("core0_exception_buffer = 0x%016lx", 429 fw->req->cn10k_req.jd.fw_load.debug.core0_exception_buffer); 430 plt_ml_dbg("core1_exception_buffer = 0x%016lx", 431 fw->req->cn10k_req.jd.fw_load.debug.core1_exception_buffer); 432 plt_ml_dbg("exception_state_size = %u bytes", 433 fw->req->cn10k_req.jd.fw_load.debug.exception_state_size); 434 plt_ml_dbg("flags = 0x%016lx", fw->req->cn10k_req.jd.fw_load.flags); 435 } 436 437 uint64_t 438 cn10k_ml_fw_flags_get(struct cn10k_ml_fw *fw) 439 { 440 uint64_t flags = 0x0; 441 442 if (fw->enable_dpe_warnings) 443 flags = flags | FW_ENABLE_DPE_WARNING_BITMASK; 444 445 if (fw->report_dpe_warnings) 446 flags = flags | FW_REPORT_DPE_WARNING_BITMASK; 447 448 flags = flags | FW_USE_DDR_POLL_ADDR_FP; 449 450 return flags; 451 } 452 453 static int 454 cn10k_ml_fw_load_asim(struct cn10k_ml_fw *fw) 455 { 456 struct cn10k_ml_dev *cn10k_mldev; 457 uint64_t timeout_cycle; 458 uint64_t reg_val64; 459 bool timeout; 460 int ret = 0; 461 462 cn10k_mldev = fw->cn10k_mldev; 463 464 /* Reset HEAD and TAIL debug pointer registers */ 465 roc_ml_reg_write64(&cn10k_mldev->roc, 0, ML_SCRATCH_DBG_BUFFER_HEAD_C0); 466 roc_ml_reg_write64(&cn10k_mldev->roc, 0, ML_SCRATCH_DBG_BUFFER_TAIL_C0); 467 roc_ml_reg_write64(&cn10k_mldev->roc, 0, ML_SCRATCH_DBG_BUFFER_HEAD_C1); 468 roc_ml_reg_write64(&cn10k_mldev->roc, 0, ML_SCRATCH_DBG_BUFFER_TAIL_C1); 469 roc_ml_reg_write64(&cn10k_mldev->roc, 0, ML_SCRATCH_EXCEPTION_SP_C0); 470 roc_ml_reg_write64(&cn10k_mldev->roc, 0, ML_SCRATCH_EXCEPTION_SP_C1); 471 472 /* Set ML_MLR_BASE to base IOVA of the ML region in LLC/DRAM. */ 473 reg_val64 = rte_eal_get_baseaddr(); 474 roc_ml_reg_write64(&cn10k_mldev->roc, reg_val64, ML_MLR_BASE); 475 plt_ml_dbg("ML_MLR_BASE = 0x%016lx", roc_ml_reg_read64(&cn10k_mldev->roc, ML_MLR_BASE)); 476 roc_ml_reg_save(&cn10k_mldev->roc, ML_MLR_BASE); 477 478 /* Update FW load completion structure */ 479 fw->req->cn10k_req.jd.hdr.jce.w1.u64 = PLT_U64_CAST(&fw->req->cn10k_req.status); 480 fw->req->cn10k_req.jd.hdr.job_type = ML_CN10K_JOB_TYPE_FIRMWARE_LOAD; 481 fw->req->cn10k_req.jd.hdr.result = 482 roc_ml_addr_ap2mlip(&cn10k_mldev->roc, &fw->req->cn10k_req.result); 483 fw->req->cn10k_req.jd.fw_load.flags = cn10k_ml_fw_flags_get(fw); 484 plt_write64(ML_CNXK_POLL_JOB_START, &fw->req->cn10k_req.status); 485 plt_wmb(); 486 487 /* Enqueue FW load through scratch registers */ 488 timeout = true; 489 timeout_cycle = plt_tsc_cycles() + ML_CNXK_CMD_TIMEOUT * plt_tsc_hz(); 490 roc_ml_scratch_enqueue(&cn10k_mldev->roc, &fw->req->cn10k_req.jd); 491 492 plt_rmb(); 493 do { 494 if (roc_ml_scratch_is_done_bit_set(&cn10k_mldev->roc) && 495 (plt_read64(&fw->req->cn10k_req.status) == ML_CNXK_POLL_JOB_FINISH)) { 496 timeout = false; 497 break; 498 } 499 } while (plt_tsc_cycles() < timeout_cycle); 500 501 /* Check firmware load status, clean-up and exit on failure. */ 502 if ((!timeout) && (fw->req->cn10k_req.result.error_code == 0)) { 503 cn10k_ml_fw_print_info(fw); 504 } else { 505 /* Set ML to disable new jobs */ 506 reg_val64 = (ROC_ML_CFG_JD_SIZE | ROC_ML_CFG_MLIP_ENA); 507 roc_ml_reg_write64(&cn10k_mldev->roc, reg_val64, ML_CFG); 508 509 /* Clear scratch registers */ 510 roc_ml_reg_write64(&cn10k_mldev->roc, 0, ML_SCRATCH_WORK_PTR); 511 roc_ml_reg_write64(&cn10k_mldev->roc, 0, ML_SCRATCH_FW_CTRL); 512 513 if (timeout) { 514 plt_err("Firmware load timeout"); 515 ret = -ETIME; 516 } else { 517 plt_err("Firmware load failed"); 518 ret = -1; 519 } 520 521 return ret; 522 } 523 524 /* Reset scratch registers */ 525 roc_ml_reg_write64(&cn10k_mldev->roc, 0, ML_SCRATCH_FW_CTRL); 526 roc_ml_reg_write64(&cn10k_mldev->roc, 0, ML_SCRATCH_WORK_PTR); 527 528 /* Disable job execution, to be enabled in start */ 529 reg_val64 = roc_ml_reg_read64(&cn10k_mldev->roc, ML_CFG); 530 reg_val64 &= ~ROC_ML_CFG_ENA; 531 roc_ml_reg_write64(&cn10k_mldev->roc, reg_val64, ML_CFG); 532 plt_ml_dbg("ML_CFG => 0x%016lx", roc_ml_reg_read64(&cn10k_mldev->roc, ML_CFG)); 533 534 return ret; 535 } 536 537 static int 538 cn10k_ml_fw_load_cn10ka(struct cn10k_ml_fw *fw, void *buffer, uint64_t size) 539 { 540 union ml_a35_0_rst_vector_base_s a35_0_rst_vector_base; 541 union ml_a35_0_rst_vector_base_s a35_1_rst_vector_base; 542 struct cn10k_ml_dev *cn10k_mldev; 543 uint64_t timeout_cycle; 544 uint64_t reg_val64; 545 uint32_t reg_val32; 546 uint64_t offset; 547 bool timeout; 548 int ret = 0; 549 uint8_t i; 550 551 cn10k_mldev = fw->cn10k_mldev; 552 553 /* Reset HEAD and TAIL debug pointer registers */ 554 roc_ml_reg_write64(&cn10k_mldev->roc, 0, ML_SCRATCH_DBG_BUFFER_HEAD_C0); 555 roc_ml_reg_write64(&cn10k_mldev->roc, 0, ML_SCRATCH_DBG_BUFFER_TAIL_C0); 556 roc_ml_reg_write64(&cn10k_mldev->roc, 0, ML_SCRATCH_DBG_BUFFER_HEAD_C1); 557 roc_ml_reg_write64(&cn10k_mldev->roc, 0, ML_SCRATCH_DBG_BUFFER_TAIL_C1); 558 roc_ml_reg_write64(&cn10k_mldev->roc, 0, ML_SCRATCH_EXCEPTION_SP_C0); 559 roc_ml_reg_write64(&cn10k_mldev->roc, 0, ML_SCRATCH_EXCEPTION_SP_C1); 560 561 /* (1) Write firmware images for ACC's two A35 cores to the ML region in LLC / DRAM. */ 562 rte_memcpy(PLT_PTR_ADD(fw->data, FW_LINKER_OFFSET), buffer, size); 563 564 /* (2) Set ML(0)_MLR_BASE = Base IOVA of the ML region in LLC/DRAM. */ 565 reg_val64 = PLT_PTR_SUB_U64_CAST(fw->data, rte_eal_get_baseaddr()); 566 roc_ml_reg_write64(&cn10k_mldev->roc, reg_val64, ML_MLR_BASE); 567 plt_ml_dbg("ML_MLR_BASE => 0x%016lx", roc_ml_reg_read64(&cn10k_mldev->roc, ML_MLR_BASE)); 568 roc_ml_reg_save(&cn10k_mldev->roc, ML_MLR_BASE); 569 570 /* (3) Set ML(0)_AXI_BRIDGE_CTRL(1) = 0x184003 to remove back-pressure check on DMA AXI 571 * bridge. 572 */ 573 reg_val64 = (ROC_ML_AXI_BRIDGE_CTRL_AXI_RESP_CTRL | 574 ROC_ML_AXI_BRIDGE_CTRL_BRIDGE_CTRL_MODE | ROC_ML_AXI_BRIDGE_CTRL_NCB_WR_BLK | 575 ROC_ML_AXI_BRIDGE_CTRL_FORCE_WRESP_OK | ROC_ML_AXI_BRIDGE_CTRL_FORCE_RRESP_OK); 576 roc_ml_reg_write64(&cn10k_mldev->roc, reg_val64, ML_AXI_BRIDGE_CTRL(1)); 577 plt_ml_dbg("ML_AXI_BRIDGE_CTRL(1) => 0x%016lx", 578 roc_ml_reg_read64(&cn10k_mldev->roc, ML_AXI_BRIDGE_CTRL(1))); 579 580 /* (4) Set ML(0)_ANB(0..2)_BACKP_DISABLE = 0x3 to remove back-pressure on the AXI to NCB 581 * bridges. 582 */ 583 for (i = 0; i < ML_ANBX_NR; i++) { 584 reg_val64 = (ROC_ML_ANBX_BACKP_DISABLE_EXTMSTR_B_BACKP_DISABLE | 585 ROC_ML_ANBX_BACKP_DISABLE_EXTMSTR_R_BACKP_DISABLE); 586 roc_ml_reg_write64(&cn10k_mldev->roc, reg_val64, ML_ANBX_BACKP_DISABLE(i)); 587 plt_ml_dbg("ML_ANBX_BACKP_DISABLE(%u) => 0x%016lx", i, 588 roc_ml_reg_read64(&cn10k_mldev->roc, ML_ANBX_BACKP_DISABLE(i))); 589 } 590 591 /* (5) Set ML(0)_ANB(0..2)_NCBI_P_OVR = 0x3000 and ML(0)_ANB(0..2)_NCBI_NP_OVR = 0x3000 to 592 * signal all ML transactions as non-secure. 593 */ 594 for (i = 0; i < ML_ANBX_NR; i++) { 595 reg_val64 = (ML_ANBX_NCBI_P_OVR_ANB_NCBI_P_NS_OVR | 596 ML_ANBX_NCBI_P_OVR_ANB_NCBI_P_NS_OVR_VLD); 597 roc_ml_reg_write64(&cn10k_mldev->roc, reg_val64, ML_ANBX_NCBI_P_OVR(i)); 598 plt_ml_dbg("ML_ANBX_NCBI_P_OVR(%u) => 0x%016lx", i, 599 roc_ml_reg_read64(&cn10k_mldev->roc, ML_ANBX_NCBI_P_OVR(i))); 600 601 reg_val64 |= (ML_ANBX_NCBI_NP_OVR_ANB_NCBI_NP_NS_OVR | 602 ML_ANBX_NCBI_NP_OVR_ANB_NCBI_NP_NS_OVR_VLD); 603 roc_ml_reg_write64(&cn10k_mldev->roc, reg_val64, ML_ANBX_NCBI_NP_OVR(i)); 604 plt_ml_dbg("ML_ANBX_NCBI_NP_OVR(%u) => 0x%016lx", i, 605 roc_ml_reg_read64(&cn10k_mldev->roc, ML_ANBX_NCBI_NP_OVR(i))); 606 } 607 608 /* (6) Set ML(0)_CFG[MLIP_CLK_FORCE] = 1, to force turning on the MLIP clock. */ 609 reg_val64 = roc_ml_reg_read64(&cn10k_mldev->roc, ML_CFG); 610 reg_val64 |= ROC_ML_CFG_MLIP_CLK_FORCE; 611 roc_ml_reg_write64(&cn10k_mldev->roc, reg_val64, ML_CFG); 612 plt_ml_dbg("ML_CFG => 0x%016lx", roc_ml_reg_read64(&cn10k_mldev->roc, ML_CFG)); 613 614 /* (7) Set ML(0)_JOB_MGR_CTRL[STALL_ON_IDLE] = 0, to make sure the boot request is accepted 615 * when there is no job in the command queue. 616 */ 617 reg_val64 = roc_ml_reg_read64(&cn10k_mldev->roc, ML_JOB_MGR_CTRL); 618 reg_val64 &= ~ROC_ML_JOB_MGR_CTRL_STALL_ON_IDLE; 619 roc_ml_reg_write64(&cn10k_mldev->roc, reg_val64, ML_JOB_MGR_CTRL); 620 plt_ml_dbg("ML_JOB_MGR_CTRL => 0x%016lx", 621 roc_ml_reg_read64(&cn10k_mldev->roc, ML_JOB_MGR_CTRL)); 622 623 /* (8) Set ML(0)_CFG[ENA] = 0 and ML(0)_CFG[MLIP_ENA] = 1 to bring MLIP out of reset while 624 * keeping the job manager disabled. 625 */ 626 reg_val64 = roc_ml_reg_read64(&cn10k_mldev->roc, ML_CFG); 627 reg_val64 |= ROC_ML_CFG_MLIP_ENA; 628 reg_val64 &= ~ROC_ML_CFG_ENA; 629 roc_ml_reg_write64(&cn10k_mldev->roc, reg_val64, ML_CFG); 630 plt_ml_dbg("ML_CFG => 0x%016lx", roc_ml_reg_read64(&cn10k_mldev->roc, ML_CFG)); 631 632 /* (9) Wait at least 70 coprocessor clock cycles. */ 633 plt_delay_us(FW_WAIT_CYCLES); 634 635 /* (10) Write ML outbound addresses pointing to the firmware images written in step 1 to the 636 * following registers: ML(0)_A35_0_RST_VECTOR_BASE_W(0..1) for core 0, 637 * ML(0)_A35_1_RST_VECTOR_BASE_W(0..1) for core 1. The value written to each register is the 638 * AXI outbound address divided by 4. Read after write. 639 */ 640 offset = PLT_PTR_ADD_U64_CAST( 641 fw->data, FW_LINKER_OFFSET - roc_ml_reg_read64(&cn10k_mldev->roc, ML_MLR_BASE)); 642 a35_0_rst_vector_base.s.addr = (offset + ML_AXI_START_ADDR) / 4; 643 a35_1_rst_vector_base.s.addr = (offset + ML_AXI_START_ADDR) / 4; 644 645 roc_ml_reg_write32(&cn10k_mldev->roc, a35_0_rst_vector_base.w.w0, 646 ML_A35_0_RST_VECTOR_BASE_W(0)); 647 reg_val32 = roc_ml_reg_read32(&cn10k_mldev->roc, ML_A35_0_RST_VECTOR_BASE_W(0)); 648 plt_ml_dbg("ML_A35_0_RST_VECTOR_BASE_W(0) => 0x%08x", reg_val32); 649 650 roc_ml_reg_write32(&cn10k_mldev->roc, a35_0_rst_vector_base.w.w1, 651 ML_A35_0_RST_VECTOR_BASE_W(1)); 652 reg_val32 = roc_ml_reg_read32(&cn10k_mldev->roc, ML_A35_0_RST_VECTOR_BASE_W(1)); 653 plt_ml_dbg("ML_A35_0_RST_VECTOR_BASE_W(1) => 0x%08x", reg_val32); 654 655 roc_ml_reg_write32(&cn10k_mldev->roc, a35_1_rst_vector_base.w.w0, 656 ML_A35_1_RST_VECTOR_BASE_W(0)); 657 reg_val32 = roc_ml_reg_read32(&cn10k_mldev->roc, ML_A35_1_RST_VECTOR_BASE_W(0)); 658 plt_ml_dbg("ML_A35_1_RST_VECTOR_BASE_W(0) => 0x%08x", reg_val32); 659 660 roc_ml_reg_write32(&cn10k_mldev->roc, a35_1_rst_vector_base.w.w1, 661 ML_A35_1_RST_VECTOR_BASE_W(1)); 662 reg_val32 = roc_ml_reg_read32(&cn10k_mldev->roc, ML_A35_1_RST_VECTOR_BASE_W(1)); 663 plt_ml_dbg("ML_A35_1_RST_VECTOR_BASE_W(1) => 0x%08x", reg_val32); 664 665 /* (11) Clear MLIP's ML(0)_SW_RST_CTRL[ACC_RST]. This will bring the ACC cores and other 666 * MLIP components out of reset. The cores will execute firmware from the ML region as 667 * written in step 1. 668 */ 669 reg_val32 = roc_ml_reg_read32(&cn10k_mldev->roc, ML_SW_RST_CTRL); 670 reg_val32 &= ~ROC_ML_SW_RST_CTRL_ACC_RST; 671 roc_ml_reg_write32(&cn10k_mldev->roc, reg_val32, ML_SW_RST_CTRL); 672 reg_val32 = roc_ml_reg_read32(&cn10k_mldev->roc, ML_SW_RST_CTRL); 673 plt_ml_dbg("ML_SW_RST_CTRL => 0x%08x", reg_val32); 674 675 /* (12) Wait for notification from firmware that ML is ready for job execution. */ 676 fw->req->cn10k_req.jd.hdr.jce.w1.u64 = PLT_U64_CAST(&fw->req->cn10k_req.status); 677 fw->req->cn10k_req.jd.hdr.job_type = ML_CN10K_JOB_TYPE_FIRMWARE_LOAD; 678 fw->req->cn10k_req.jd.hdr.result = 679 roc_ml_addr_ap2mlip(&cn10k_mldev->roc, &fw->req->cn10k_req.result); 680 fw->req->cn10k_req.jd.fw_load.flags = cn10k_ml_fw_flags_get(fw); 681 plt_write64(ML_CNXK_POLL_JOB_START, &fw->req->cn10k_req.status); 682 plt_wmb(); 683 684 /* Enqueue FW load through scratch registers */ 685 timeout = true; 686 timeout_cycle = plt_tsc_cycles() + ML_CNXK_CMD_TIMEOUT * plt_tsc_hz(); 687 roc_ml_scratch_enqueue(&cn10k_mldev->roc, &fw->req->cn10k_req.jd); 688 689 plt_rmb(); 690 do { 691 if (roc_ml_scratch_is_done_bit_set(&cn10k_mldev->roc) && 692 (plt_read64(&fw->req->cn10k_req.status) == ML_CNXK_POLL_JOB_FINISH)) { 693 timeout = false; 694 break; 695 } 696 } while (plt_tsc_cycles() < timeout_cycle); 697 698 /* Check firmware load status, clean-up and exit on failure. */ 699 if ((!timeout) && (fw->req->cn10k_req.result.error_code == 0)) { 700 cn10k_ml_fw_print_info(fw); 701 } else { 702 /* Set ML to disable new jobs */ 703 reg_val64 = (ROC_ML_CFG_JD_SIZE | ROC_ML_CFG_MLIP_ENA); 704 roc_ml_reg_write64(&cn10k_mldev->roc, reg_val64, ML_CFG); 705 706 /* Clear scratch registers */ 707 roc_ml_reg_write64(&cn10k_mldev->roc, 0, ML_SCRATCH_WORK_PTR); 708 roc_ml_reg_write64(&cn10k_mldev->roc, 0, ML_SCRATCH_FW_CTRL); 709 710 if (timeout) { 711 plt_err("Firmware load timeout"); 712 ret = -ETIME; 713 } else { 714 plt_err("Firmware load failed"); 715 ret = -1; 716 } 717 718 return ret; 719 } 720 721 /* (13) Set ML(0)_JOB_MGR_CTRL[STALL_ON_IDLE] = 0x1; this is needed to shut down the MLIP 722 * clock when there are no more jobs to process. 723 */ 724 reg_val64 = roc_ml_reg_read64(&cn10k_mldev->roc, ML_JOB_MGR_CTRL); 725 reg_val64 |= ROC_ML_JOB_MGR_CTRL_STALL_ON_IDLE; 726 roc_ml_reg_write64(&cn10k_mldev->roc, reg_val64, ML_JOB_MGR_CTRL); 727 plt_ml_dbg("ML_JOB_MGR_CTRL => 0x%016lx", 728 roc_ml_reg_read64(&cn10k_mldev->roc, ML_JOB_MGR_CTRL)); 729 730 /* (14) Set ML(0)_CFG[MLIP_CLK_FORCE] = 0; the MLIP clock will be turned on/off based on job 731 * activities. 732 */ 733 reg_val64 = roc_ml_reg_read64(&cn10k_mldev->roc, ML_CFG); 734 reg_val64 &= ~ROC_ML_CFG_MLIP_CLK_FORCE; 735 roc_ml_reg_write64(&cn10k_mldev->roc, reg_val64, ML_CFG); 736 plt_ml_dbg("ML_CFG => 0x%016lx", roc_ml_reg_read64(&cn10k_mldev->roc, ML_CFG)); 737 738 /* (15) Set ML(0)_CFG[ENA] to enable ML job execution. */ 739 reg_val64 = roc_ml_reg_read64(&cn10k_mldev->roc, ML_CFG); 740 reg_val64 |= ROC_ML_CFG_ENA; 741 roc_ml_reg_write64(&cn10k_mldev->roc, reg_val64, ML_CFG); 742 plt_ml_dbg("ML_CFG => 0x%016lx", roc_ml_reg_read64(&cn10k_mldev->roc, ML_CFG)); 743 744 /* Reset scratch registers */ 745 roc_ml_reg_write64(&cn10k_mldev->roc, 0, ML_SCRATCH_FW_CTRL); 746 roc_ml_reg_write64(&cn10k_mldev->roc, 0, ML_SCRATCH_WORK_PTR); 747 748 /* Disable job execution, to be enabled in start */ 749 reg_val64 = roc_ml_reg_read64(&cn10k_mldev->roc, ML_CFG); 750 reg_val64 &= ~ROC_ML_CFG_ENA; 751 roc_ml_reg_write64(&cn10k_mldev->roc, reg_val64, ML_CFG); 752 plt_ml_dbg("ML_CFG => 0x%016lx", roc_ml_reg_read64(&cn10k_mldev->roc, ML_CFG)); 753 754 /* Additional fixes: Set RO bit to fix O2D DMA bandwidth issue on cn10ka */ 755 for (i = 0; i < ML_ANBX_NR; i++) { 756 reg_val64 = roc_ml_reg_read64(&cn10k_mldev->roc, ML_ANBX_NCBI_P_OVR(i)); 757 reg_val64 |= (ML_ANBX_NCBI_P_OVR_ANB_NCBI_P_RO_OVR | 758 ML_ANBX_NCBI_P_OVR_ANB_NCBI_P_RO_OVR_VLD); 759 roc_ml_reg_write64(&cn10k_mldev->roc, reg_val64, ML_ANBX_NCBI_P_OVR(i)); 760 } 761 762 return ret; 763 } 764 765 int 766 cn10k_ml_fw_load(struct cnxk_ml_dev *cnxk_mldev) 767 { 768 struct cn10k_ml_dev *cn10k_mldev; 769 const struct plt_memzone *mz; 770 struct cn10k_ml_fw *fw; 771 void *fw_buffer = NULL; 772 uint64_t mz_size = 0; 773 uint64_t fw_size = 0; 774 int ret = 0; 775 776 cn10k_mldev = &cnxk_mldev->cn10k_mldev; 777 fw = &cn10k_mldev->fw; 778 fw->cn10k_mldev = cn10k_mldev; 779 780 if (roc_env_is_emulator() || roc_env_is_hw()) { 781 /* Read firmware image to a buffer */ 782 ret = rte_firmware_read(fw->path, &fw_buffer, &fw_size); 783 if ((ret < 0) || (fw_buffer == NULL)) { 784 plt_err("Unable to read firmware data: %s", fw->path); 785 return ret; 786 } 787 788 /* Reserve memzone for firmware load completion and data */ 789 mz_size = sizeof(struct cnxk_ml_req) + fw_size + FW_STACK_BUFFER_SIZE + 790 FW_DEBUG_BUFFER_SIZE + FW_EXCEPTION_BUFFER_SIZE; 791 } else if (roc_env_is_asim()) { 792 /* Reserve memzone for firmware load completion */ 793 mz_size = sizeof(struct cnxk_ml_req); 794 } 795 796 mz = plt_memzone_reserve_aligned(FW_MEMZONE_NAME, mz_size, 0, ML_CN10K_ALIGN_SIZE); 797 if (mz == NULL) { 798 plt_err("plt_memzone_reserve failed : %s", FW_MEMZONE_NAME); 799 free(fw_buffer); 800 return -ENOMEM; 801 } 802 fw->req = mz->addr; 803 804 /* Reset firmware load completion structure */ 805 memset(&fw->req->cn10k_req.jd, 0, sizeof(struct cn10k_ml_jd)); 806 memset(&fw->req->cn10k_req.jd.fw_load.version[0], '\0', MLDEV_FIRMWARE_VERSION_LENGTH); 807 808 /* Reset device, if in active state */ 809 if (roc_ml_mlip_is_enabled(&cn10k_mldev->roc)) 810 roc_ml_mlip_reset(&cn10k_mldev->roc, true); 811 812 /* Load firmware */ 813 if (roc_env_is_emulator() || roc_env_is_hw()) { 814 fw->data = PLT_PTR_ADD(mz->addr, sizeof(struct cnxk_ml_req)); 815 ret = cn10k_ml_fw_load_cn10ka(fw, fw_buffer, fw_size); 816 free(fw_buffer); 817 } else if (roc_env_is_asim()) { 818 fw->data = NULL; 819 ret = cn10k_ml_fw_load_asim(fw); 820 } 821 822 if (ret < 0) 823 cn10k_ml_fw_unload(cnxk_mldev); 824 825 return ret; 826 } 827 828 void 829 cn10k_ml_fw_unload(struct cnxk_ml_dev *cnxk_mldev) 830 { 831 struct cn10k_ml_dev *cn10k_mldev; 832 const struct plt_memzone *mz; 833 uint64_t reg_val; 834 835 cn10k_mldev = &cnxk_mldev->cn10k_mldev; 836 837 /* Disable and reset device */ 838 reg_val = roc_ml_reg_read64(&cn10k_mldev->roc, ML_CFG); 839 reg_val &= ~ROC_ML_CFG_MLIP_ENA; 840 roc_ml_reg_write64(&cn10k_mldev->roc, reg_val, ML_CFG); 841 roc_ml_mlip_reset(&cn10k_mldev->roc, true); 842 843 mz = plt_memzone_lookup(FW_MEMZONE_NAME); 844 if (mz != NULL) 845 plt_memzone_free(mz); 846 } 847 848 static struct rte_pci_id pci_id_ml_table[] = { 849 {RTE_PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_CN10K_ML_PF)}, 850 /* sentinel */ 851 {}, 852 }; 853 854 static struct rte_pci_driver cn10k_mldev_pmd = { 855 .id_table = pci_id_ml_table, 856 .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_NEED_IOVA_AS_VA, 857 .probe = cn10k_ml_pci_probe, 858 .remove = cn10k_ml_pci_remove, 859 }; 860 861 RTE_PMD_REGISTER_PCI(MLDEV_NAME_CN10K_PMD, cn10k_mldev_pmd); 862 RTE_PMD_REGISTER_PCI_TABLE(MLDEV_NAME_CN10K_PMD, pci_id_ml_table); 863 RTE_PMD_REGISTER_KMOD_DEP(MLDEV_NAME_CN10K_PMD, "vfio-pci"); 864 865 RTE_PMD_REGISTER_PARAM_STRING(MLDEV_NAME_CN10K_PMD, CN10K_ML_FW_PATH 866 "=<path>" CN10K_ML_FW_ENABLE_DPE_WARNINGS 867 "=<0|1>" CN10K_ML_FW_REPORT_DPE_WARNINGS 868 "=<0|1>" CN10K_ML_DEV_CACHE_MODEL_DATA 869 "=<0|1>" CN10K_ML_OCM_ALLOC_MODE 870 "=<lowest|largest>" CN10K_ML_DEV_HW_QUEUE_LOCK 871 "=<0|1>" CN10K_ML_OCM_PAGE_SIZE "=<1024|2048|4096|8192|16384>"); 872