1 /* $NetBSD: amdgpu_uvd.c,v 1.7 2020/02/14 14:34:58 maya Exp $ */ 2 3 /* 4 * Copyright 2011 Advanced Micro Devices, Inc. 5 * All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sub license, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * The above copyright notice and this permission notice (including the 24 * next paragraph) shall be included in all copies or substantial portions 25 * of the Software. 26 * 27 */ 28 /* 29 * Authors: 30 * Christian König <deathsimple@vodafone.de> 31 */ 32 33 #include <sys/cdefs.h> 34 __KERNEL_RCSID(0, "$NetBSD: amdgpu_uvd.c,v 1.7 2020/02/14 14:34:58 maya Exp $"); 35 36 #include <linux/firmware.h> 37 #include <linux/module.h> 38 #include <drm/drmP.h> 39 #include <drm/drm.h> 40 41 #include "amdgpu.h" 42 #include "amdgpu_pm.h" 43 #include "amdgpu_uvd.h" 44 #include "cikd.h" 45 #include "uvd/uvd_4_2_d.h" 46 47 #include <linux/nbsd-namespace.h> 48 49 /* 1 second timeout */ 50 #define UVD_IDLE_TIMEOUT_MS 1000 51 52 /* Firmware Names */ 53 #ifdef CONFIG_DRM_AMDGPU_CIK 54 #define FIRMWARE_BONAIRE "radeon/bonaire_uvd.bin" 55 #define FIRMWARE_KABINI "radeon/kabini_uvd.bin" 56 #define FIRMWARE_KAVERI "radeon/kaveri_uvd.bin" 57 #define FIRMWARE_HAWAII "radeon/hawaii_uvd.bin" 58 #define FIRMWARE_MULLINS "radeon/mullins_uvd.bin" 59 #endif 60 #define FIRMWARE_TONGA "amdgpu/tonga_uvd.bin" 61 #define FIRMWARE_CARRIZO "amdgpu/carrizo_uvd.bin" 62 #define FIRMWARE_FIJI "amdgpu/fiji_uvd.bin" 63 #define FIRMWARE_STONEY "amdgpu/stoney_uvd.bin" 64 65 /** 66 * amdgpu_uvd_cs_ctx - Command submission parser context 67 * 68 * Used for emulating virtual memory support on UVD 4.2. 69 */ 70 struct amdgpu_uvd_cs_ctx { 71 struct amdgpu_cs_parser *parser; 72 unsigned reg, count; 73 unsigned data0, data1; 74 unsigned idx; 75 unsigned ib_idx; 76 77 /* does the IB has a msg command */ 78 bool has_msg_cmd; 79 80 /* minimum buffer sizes */ 81 unsigned *buf_sizes; 82 }; 83 84 #ifdef CONFIG_DRM_AMDGPU_CIK 85 MODULE_FIRMWARE(FIRMWARE_BONAIRE); 86 MODULE_FIRMWARE(FIRMWARE_KABINI); 87 MODULE_FIRMWARE(FIRMWARE_KAVERI); 88 MODULE_FIRMWARE(FIRMWARE_HAWAII); 89 MODULE_FIRMWARE(FIRMWARE_MULLINS); 90 #endif 91 MODULE_FIRMWARE(FIRMWARE_TONGA); 92 MODULE_FIRMWARE(FIRMWARE_CARRIZO); 93 MODULE_FIRMWARE(FIRMWARE_FIJI); 94 MODULE_FIRMWARE(FIRMWARE_STONEY); 95 96 static void amdgpu_uvd_note_usage(struct amdgpu_device *adev); 97 static void amdgpu_uvd_idle_work_handler(struct work_struct *work); 98 99 int amdgpu_uvd_sw_init(struct amdgpu_device *adev) 100 { 101 unsigned long bo_size; 102 const char *fw_name; 103 const struct common_firmware_header *hdr; 104 unsigned version_major, version_minor, family_id; 105 int i, r; 106 107 INIT_DELAYED_WORK(&adev->uvd.idle_work, amdgpu_uvd_idle_work_handler); 108 109 switch (adev->asic_type) { 110 #ifdef CONFIG_DRM_AMDGPU_CIK 111 case CHIP_BONAIRE: 112 fw_name = FIRMWARE_BONAIRE; 113 break; 114 case CHIP_KABINI: 115 fw_name = FIRMWARE_KABINI; 116 break; 117 case CHIP_KAVERI: 118 fw_name = FIRMWARE_KAVERI; 119 break; 120 case CHIP_HAWAII: 121 fw_name = FIRMWARE_HAWAII; 122 break; 123 case CHIP_MULLINS: 124 fw_name = FIRMWARE_MULLINS; 125 break; 126 #endif 127 case CHIP_TONGA: 128 fw_name = FIRMWARE_TONGA; 129 break; 130 case CHIP_FIJI: 131 fw_name = FIRMWARE_FIJI; 132 break; 133 case CHIP_CARRIZO: 134 fw_name = FIRMWARE_CARRIZO; 135 break; 136 case CHIP_STONEY: 137 fw_name = FIRMWARE_STONEY; 138 break; 139 default: 140 return -EINVAL; 141 } 142 143 r = request_firmware(&adev->uvd.fw, fw_name, adev->dev); 144 if (r) { 145 dev_err(adev->dev, "amdgpu_uvd: Can't load firmware \"%s\"\n", 146 fw_name); 147 return r; 148 } 149 150 r = amdgpu_ucode_validate(adev->uvd.fw); 151 if (r) { 152 dev_err(adev->dev, "amdgpu_uvd: Can't validate firmware \"%s\"\n", 153 fw_name); 154 release_firmware(adev->uvd.fw); 155 adev->uvd.fw = NULL; 156 return r; 157 } 158 159 hdr = (const struct common_firmware_header *)adev->uvd.fw->data; 160 family_id = le32_to_cpu(hdr->ucode_version) & 0xff; 161 version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff; 162 version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff; 163 DRM_INFO("Found UVD firmware Version: %x.%x Family ID: %x\n", 164 version_major, version_minor, family_id); 165 166 adev->uvd.fw_version = ((version_major << 24) | (version_minor << 16) | 167 (family_id << 8)); 168 169 bo_size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8) 170 + AMDGPU_UVD_STACK_SIZE + AMDGPU_UVD_HEAP_SIZE; 171 r = amdgpu_bo_create(adev, bo_size, PAGE_SIZE, true, 172 AMDGPU_GEM_DOMAIN_VRAM, 173 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, 174 NULL, NULL, &adev->uvd.vcpu_bo); 175 if (r) { 176 dev_err(adev->dev, "(%d) failed to allocate UVD bo\n", r); 177 return r; 178 } 179 180 r = amdgpu_bo_reserve(adev->uvd.vcpu_bo, false); 181 if (r) { 182 amdgpu_bo_unref(&adev->uvd.vcpu_bo); 183 dev_err(adev->dev, "(%d) failed to reserve UVD bo\n", r); 184 return r; 185 } 186 187 r = amdgpu_bo_pin(adev->uvd.vcpu_bo, AMDGPU_GEM_DOMAIN_VRAM, 188 &adev->uvd.gpu_addr); 189 if (r) { 190 amdgpu_bo_unreserve(adev->uvd.vcpu_bo); 191 amdgpu_bo_unref(&adev->uvd.vcpu_bo); 192 dev_err(adev->dev, "(%d) UVD bo pin failed\n", r); 193 return r; 194 } 195 196 r = amdgpu_bo_kmap(adev->uvd.vcpu_bo, &adev->uvd.cpu_addr); 197 if (r) { 198 dev_err(adev->dev, "(%d) UVD map failed\n", r); 199 return r; 200 } 201 202 amdgpu_bo_unreserve(adev->uvd.vcpu_bo); 203 204 for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i) { 205 atomic_set(&adev->uvd.handles[i], 0); 206 adev->uvd.filp[i] = NULL; 207 } 208 209 /* from uvd v5.0 HW addressing capacity increased to 64 bits */ 210 if (!amdgpu_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0)) 211 adev->uvd.address_64_bit = true; 212 213 return 0; 214 } 215 216 int amdgpu_uvd_sw_fini(struct amdgpu_device *adev) 217 { 218 int r; 219 220 if (adev->uvd.vcpu_bo == NULL) 221 return 0; 222 223 r = amdgpu_bo_reserve(adev->uvd.vcpu_bo, false); 224 if (!r) { 225 amdgpu_bo_kunmap(adev->uvd.vcpu_bo); 226 amdgpu_bo_unpin(adev->uvd.vcpu_bo); 227 amdgpu_bo_unreserve(adev->uvd.vcpu_bo); 228 } 229 230 amdgpu_bo_unref(&adev->uvd.vcpu_bo); 231 232 amdgpu_ring_fini(&adev->uvd.ring); 233 234 release_firmware(adev->uvd.fw); 235 236 return 0; 237 } 238 239 int amdgpu_uvd_suspend(struct amdgpu_device *adev) 240 { 241 struct amdgpu_ring *ring = &adev->uvd.ring; 242 int i, r; 243 244 if (adev->uvd.vcpu_bo == NULL) 245 return 0; 246 247 for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i) { 248 uint32_t handle = atomic_read(&adev->uvd.handles[i]); 249 if (handle != 0) { 250 struct fence *fence; 251 252 amdgpu_uvd_note_usage(adev); 253 254 r = amdgpu_uvd_get_destroy_msg(ring, handle, &fence); 255 if (r) { 256 DRM_ERROR("Error destroying UVD (%d)!\n", r); 257 continue; 258 } 259 260 fence_wait(fence, false); 261 fence_put(fence); 262 263 adev->uvd.filp[i] = NULL; 264 atomic_set(&adev->uvd.handles[i], 0); 265 } 266 } 267 268 return 0; 269 } 270 271 int amdgpu_uvd_resume(struct amdgpu_device *adev) 272 { 273 unsigned size; 274 void *ptr; 275 const struct common_firmware_header *hdr; 276 unsigned offset; 277 278 if (adev->uvd.vcpu_bo == NULL) 279 return -EINVAL; 280 281 hdr = (const struct common_firmware_header *)adev->uvd.fw->data; 282 offset = le32_to_cpu(hdr->ucode_array_offset_bytes); 283 memcpy(adev->uvd.cpu_addr, (adev->uvd.fw->data) + offset, 284 (adev->uvd.fw->size) - offset); 285 286 cancel_delayed_work_sync(&adev->uvd.idle_work); 287 288 size = amdgpu_bo_size(adev->uvd.vcpu_bo); 289 size -= le32_to_cpu(hdr->ucode_size_bytes); 290 ptr = adev->uvd.cpu_addr; 291 ptr += le32_to_cpu(hdr->ucode_size_bytes); 292 293 memset(ptr, 0, size); 294 295 return 0; 296 } 297 298 void amdgpu_uvd_free_handles(struct amdgpu_device *adev, struct drm_file *filp) 299 { 300 struct amdgpu_ring *ring = &adev->uvd.ring; 301 int i, r; 302 303 for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i) { 304 uint32_t handle = atomic_read(&adev->uvd.handles[i]); 305 if (handle != 0 && adev->uvd.filp[i] == filp) { 306 struct fence *fence; 307 308 amdgpu_uvd_note_usage(adev); 309 310 r = amdgpu_uvd_get_destroy_msg(ring, handle, &fence); 311 if (r) { 312 DRM_ERROR("Error destroying UVD (%d)!\n", r); 313 continue; 314 } 315 316 fence_wait(fence, false); 317 fence_put(fence); 318 319 adev->uvd.filp[i] = NULL; 320 atomic_set(&adev->uvd.handles[i], 0); 321 } 322 } 323 } 324 325 static void amdgpu_uvd_force_into_uvd_segment(struct amdgpu_bo *rbo) 326 { 327 int i; 328 for (i = 0; i < rbo->placement.num_placement; ++i) { 329 rbo->placements[i].fpfn = 0 >> PAGE_SHIFT; 330 rbo->placements[i].lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT; 331 } 332 } 333 334 /** 335 * amdgpu_uvd_cs_pass1 - first parsing round 336 * 337 * @ctx: UVD parser context 338 * 339 * Make sure UVD message and feedback buffers are in VRAM and 340 * nobody is violating an 256MB boundary. 341 */ 342 static int amdgpu_uvd_cs_pass1(struct amdgpu_uvd_cs_ctx *ctx) 343 { 344 struct amdgpu_bo_va_mapping *mapping; 345 struct amdgpu_bo *bo; 346 uint32_t cmd, lo, hi; 347 uint64_t addr; 348 int r = 0; 349 350 lo = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->data0); 351 hi = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->data1); 352 addr = ((uint64_t)lo) | (((uint64_t)hi) << 32); 353 354 mapping = amdgpu_cs_find_mapping(ctx->parser, addr, &bo); 355 if (mapping == NULL) { 356 DRM_ERROR("Can't find BO for addr 0x%08"PRIx64"\n", addr); 357 return -EINVAL; 358 } 359 360 if (!ctx->parser->adev->uvd.address_64_bit) { 361 /* check if it's a message or feedback command */ 362 cmd = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->idx) >> 1; 363 if (cmd == 0x0 || cmd == 0x3) { 364 /* yes, force it into VRAM */ 365 uint32_t domain = AMDGPU_GEM_DOMAIN_VRAM; 366 amdgpu_ttm_placement_from_domain(bo, domain); 367 } 368 amdgpu_uvd_force_into_uvd_segment(bo); 369 370 r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false); 371 } 372 373 return r; 374 } 375 376 /** 377 * amdgpu_uvd_cs_msg_decode - handle UVD decode message 378 * 379 * @msg: pointer to message structure 380 * @buf_sizes: returned buffer sizes 381 * 382 * Peek into the decode message and calculate the necessary buffer sizes. 383 */ 384 static int amdgpu_uvd_cs_msg_decode(uint32_t *msg, unsigned buf_sizes[]) 385 { 386 unsigned stream_type = msg[4]; 387 unsigned width = msg[6]; 388 unsigned height = msg[7]; 389 unsigned dpb_size = msg[9]; 390 unsigned pitch = msg[28]; 391 unsigned level = msg[57]; 392 393 unsigned width_in_mb = width / 16; 394 unsigned height_in_mb = ALIGN(height / 16, 2); 395 unsigned fs_in_mb = width_in_mb * height_in_mb; 396 397 unsigned image_size, tmp, min_dpb_size, num_dpb_buffer; 398 unsigned min_ctx_size = 0; 399 400 image_size = width * height; 401 image_size += image_size / 2; 402 image_size = ALIGN(image_size, 1024); 403 404 switch (stream_type) { 405 case 0: /* H264 */ 406 case 7: /* H264 Perf */ 407 switch(level) { 408 case 30: 409 num_dpb_buffer = 8100 / fs_in_mb; 410 break; 411 case 31: 412 num_dpb_buffer = 18000 / fs_in_mb; 413 break; 414 case 32: 415 num_dpb_buffer = 20480 / fs_in_mb; 416 break; 417 case 41: 418 num_dpb_buffer = 32768 / fs_in_mb; 419 break; 420 case 42: 421 num_dpb_buffer = 34816 / fs_in_mb; 422 break; 423 case 50: 424 num_dpb_buffer = 110400 / fs_in_mb; 425 break; 426 case 51: 427 num_dpb_buffer = 184320 / fs_in_mb; 428 break; 429 default: 430 num_dpb_buffer = 184320 / fs_in_mb; 431 break; 432 } 433 num_dpb_buffer++; 434 if (num_dpb_buffer > 17) 435 num_dpb_buffer = 17; 436 437 /* reference picture buffer */ 438 min_dpb_size = image_size * num_dpb_buffer; 439 440 /* macroblock context buffer */ 441 min_dpb_size += width_in_mb * height_in_mb * num_dpb_buffer * 192; 442 443 /* IT surface buffer */ 444 min_dpb_size += width_in_mb * height_in_mb * 32; 445 break; 446 447 case 1: /* VC1 */ 448 449 /* reference picture buffer */ 450 min_dpb_size = image_size * 3; 451 452 /* CONTEXT_BUFFER */ 453 min_dpb_size += width_in_mb * height_in_mb * 128; 454 455 /* IT surface buffer */ 456 min_dpb_size += width_in_mb * 64; 457 458 /* DB surface buffer */ 459 min_dpb_size += width_in_mb * 128; 460 461 /* BP */ 462 tmp = max(width_in_mb, height_in_mb); 463 min_dpb_size += ALIGN(tmp * 7 * 16, 64); 464 break; 465 466 case 3: /* MPEG2 */ 467 468 /* reference picture buffer */ 469 min_dpb_size = image_size * 3; 470 break; 471 472 case 4: /* MPEG4 */ 473 474 /* reference picture buffer */ 475 min_dpb_size = image_size * 3; 476 477 /* CM */ 478 min_dpb_size += width_in_mb * height_in_mb * 64; 479 480 /* IT surface buffer */ 481 min_dpb_size += ALIGN(width_in_mb * height_in_mb * 32, 64); 482 break; 483 484 case 16: /* H265 */ 485 image_size = (ALIGN(width, 16) * ALIGN(height, 16) * 3) / 2; 486 image_size = ALIGN(image_size, 256); 487 488 num_dpb_buffer = (le32_to_cpu(msg[59]) & 0xff) + 2; 489 min_dpb_size = image_size * num_dpb_buffer; 490 min_ctx_size = ((width + 255) / 16) * ((height + 255) / 16) 491 * 16 * num_dpb_buffer + 52 * 1024; 492 break; 493 494 default: 495 DRM_ERROR("UVD codec not handled %d!\n", stream_type); 496 return -EINVAL; 497 } 498 499 if (width > pitch) { 500 DRM_ERROR("Invalid UVD decoding target pitch!\n"); 501 return -EINVAL; 502 } 503 504 if (dpb_size < min_dpb_size) { 505 DRM_ERROR("Invalid dpb_size in UVD message (%d / %d)!\n", 506 dpb_size, min_dpb_size); 507 return -EINVAL; 508 } 509 510 buf_sizes[0x1] = dpb_size; 511 buf_sizes[0x2] = image_size; 512 buf_sizes[0x4] = min_ctx_size; 513 return 0; 514 } 515 516 /** 517 * amdgpu_uvd_cs_msg - handle UVD message 518 * 519 * @ctx: UVD parser context 520 * @bo: buffer object containing the message 521 * @offset: offset into the buffer object 522 * 523 * Peek into the UVD message and extract the session id. 524 * Make sure that we don't open up to many sessions. 525 */ 526 static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx, 527 struct amdgpu_bo *bo, unsigned offset) 528 { 529 struct amdgpu_device *adev = ctx->parser->adev; 530 int32_t *msg, msg_type, handle; 531 void *ptr; 532 long r; 533 int i; 534 535 if (offset & 0x3F) { 536 DRM_ERROR("UVD messages must be 64 byte aligned!\n"); 537 return -EINVAL; 538 } 539 540 r = reservation_object_wait_timeout_rcu(bo->tbo.resv, true, false, 541 MAX_SCHEDULE_TIMEOUT); 542 if (r < 0) { 543 DRM_ERROR("Failed waiting for UVD message (%ld)!\n", r); 544 return r; 545 } 546 547 r = amdgpu_bo_kmap(bo, &ptr); 548 if (r) { 549 DRM_ERROR("Failed mapping the UVD message (%ld)!\n", r); 550 return r; 551 } 552 553 msg = ptr + offset; 554 555 msg_type = msg[1]; 556 handle = msg[2]; 557 558 if (handle == 0) { 559 DRM_ERROR("Invalid UVD handle!\n"); 560 return -EINVAL; 561 } 562 563 switch (msg_type) { 564 case 0: 565 /* it's a create msg, calc image size (width * height) */ 566 amdgpu_bo_kunmap(bo); 567 568 /* try to alloc a new handle */ 569 for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i) { 570 if (atomic_read(&adev->uvd.handles[i]) == handle) { 571 DRM_ERROR("Handle 0x%x already in use!\n", handle); 572 return -EINVAL; 573 } 574 575 if (!atomic_cmpxchg(&adev->uvd.handles[i], 0, handle)) { 576 adev->uvd.filp[i] = ctx->parser->filp; 577 return 0; 578 } 579 } 580 581 DRM_ERROR("No more free UVD handles!\n"); 582 return -EINVAL; 583 584 case 1: 585 /* it's a decode msg, calc buffer sizes */ 586 r = amdgpu_uvd_cs_msg_decode(msg, ctx->buf_sizes); 587 amdgpu_bo_kunmap(bo); 588 if (r) 589 return r; 590 591 /* validate the handle */ 592 for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i) { 593 if (atomic_read(&adev->uvd.handles[i]) == handle) { 594 if (adev->uvd.filp[i] != ctx->parser->filp) { 595 DRM_ERROR("UVD handle collision detected!\n"); 596 return -EINVAL; 597 } 598 return 0; 599 } 600 } 601 602 DRM_ERROR("Invalid UVD handle 0x%x!\n", handle); 603 return -ENOENT; 604 605 case 2: 606 /* it's a destroy msg, free the handle */ 607 for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i) 608 atomic_cmpxchg(&adev->uvd.handles[i], handle, 0); 609 amdgpu_bo_kunmap(bo); 610 return 0; 611 612 default: 613 DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type); 614 return -EINVAL; 615 } 616 BUG(); 617 return -EINVAL; 618 } 619 620 /** 621 * amdgpu_uvd_cs_pass2 - second parsing round 622 * 623 * @ctx: UVD parser context 624 * 625 * Patch buffer addresses, make sure buffer sizes are correct. 626 */ 627 static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx) 628 { 629 struct amdgpu_bo_va_mapping *mapping; 630 struct amdgpu_bo *bo; 631 struct amdgpu_ib *ib; 632 uint32_t cmd, lo, hi; 633 uint64_t start, end; 634 uint64_t addr; 635 int r; 636 637 lo = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->data0); 638 hi = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->data1); 639 addr = ((uint64_t)lo) | (((uint64_t)hi) << 32); 640 641 mapping = amdgpu_cs_find_mapping(ctx->parser, addr, &bo); 642 if (mapping == NULL) 643 return -EINVAL; 644 645 start = amdgpu_bo_gpu_offset(bo); 646 647 end = (mapping->it.last + 1 - mapping->it.start); 648 end = end * AMDGPU_GPU_PAGE_SIZE + start; 649 650 addr -= ((uint64_t)mapping->it.start) * AMDGPU_GPU_PAGE_SIZE; 651 start += addr; 652 653 ib = &ctx->parser->ibs[ctx->ib_idx]; 654 ib->ptr[ctx->data0] = start & 0xFFFFFFFF; 655 ib->ptr[ctx->data1] = start >> 32; 656 657 cmd = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->idx) >> 1; 658 if (cmd < 0x4) { 659 if ((end - start) < ctx->buf_sizes[cmd]) { 660 DRM_ERROR("buffer (%d) to small (%d / %d)!\n", cmd, 661 (unsigned)(end - start), 662 ctx->buf_sizes[cmd]); 663 return -EINVAL; 664 } 665 666 } else if (cmd == 0x206) { 667 if ((end - start) < ctx->buf_sizes[4]) { 668 DRM_ERROR("buffer (%d) to small (%d / %d)!\n", cmd, 669 (unsigned)(end - start), 670 ctx->buf_sizes[4]); 671 return -EINVAL; 672 } 673 } else if ((cmd != 0x100) && (cmd != 0x204)) { 674 DRM_ERROR("invalid UVD command %X!\n", cmd); 675 return -EINVAL; 676 } 677 678 if (!ctx->parser->adev->uvd.address_64_bit) { 679 if ((start >> 28) != ((end - 1) >> 28)) { 680 DRM_ERROR("reloc %"PRIX64"-%"PRIX64" crossing 256MB boundary!\n", 681 start, end); 682 return -EINVAL; 683 } 684 685 if ((cmd == 0 || cmd == 0x3) && 686 (start >> 28) != (ctx->parser->adev->uvd.gpu_addr >> 28)) { 687 DRM_ERROR("msg/fb buffer %"PRIX64"-%"PRIX64" out of 256MB segment!\n", 688 start, end); 689 return -EINVAL; 690 } 691 } 692 693 if (cmd == 0) { 694 ctx->has_msg_cmd = true; 695 r = amdgpu_uvd_cs_msg(ctx, bo, addr); 696 if (r) 697 return r; 698 } else if (!ctx->has_msg_cmd) { 699 DRM_ERROR("Message needed before other commands are send!\n"); 700 return -EINVAL; 701 } 702 703 return 0; 704 } 705 706 /** 707 * amdgpu_uvd_cs_reg - parse register writes 708 * 709 * @ctx: UVD parser context 710 * @cb: callback function 711 * 712 * Parse the register writes, call cb on each complete command. 713 */ 714 static int amdgpu_uvd_cs_reg(struct amdgpu_uvd_cs_ctx *ctx, 715 int (*cb)(struct amdgpu_uvd_cs_ctx *ctx)) 716 { 717 struct amdgpu_ib *ib = &ctx->parser->ibs[ctx->ib_idx]; 718 int i, r; 719 720 ctx->idx++; 721 for (i = 0; i <= ctx->count; ++i) { 722 unsigned reg = ctx->reg + i; 723 724 if (ctx->idx >= ib->length_dw) { 725 DRM_ERROR("Register command after end of CS!\n"); 726 return -EINVAL; 727 } 728 729 switch (reg) { 730 case mmUVD_GPCOM_VCPU_DATA0: 731 ctx->data0 = ctx->idx; 732 break; 733 case mmUVD_GPCOM_VCPU_DATA1: 734 ctx->data1 = ctx->idx; 735 break; 736 case mmUVD_GPCOM_VCPU_CMD: 737 r = cb(ctx); 738 if (r) 739 return r; 740 break; 741 case mmUVD_ENGINE_CNTL: 742 break; 743 default: 744 DRM_ERROR("Invalid reg 0x%X!\n", reg); 745 return -EINVAL; 746 } 747 ctx->idx++; 748 } 749 return 0; 750 } 751 752 /** 753 * amdgpu_uvd_cs_packets - parse UVD packets 754 * 755 * @ctx: UVD parser context 756 * @cb: callback function 757 * 758 * Parse the command stream packets. 759 */ 760 static int amdgpu_uvd_cs_packets(struct amdgpu_uvd_cs_ctx *ctx, 761 int (*cb)(struct amdgpu_uvd_cs_ctx *ctx)) 762 { 763 struct amdgpu_ib *ib = &ctx->parser->ibs[ctx->ib_idx]; 764 int r; 765 766 for (ctx->idx = 0 ; ctx->idx < ib->length_dw; ) { 767 uint32_t cmd = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->idx); 768 unsigned type = CP_PACKET_GET_TYPE(cmd); 769 switch (type) { 770 case PACKET_TYPE0: 771 ctx->reg = CP_PACKET0_GET_REG(cmd); 772 ctx->count = CP_PACKET_GET_COUNT(cmd); 773 r = amdgpu_uvd_cs_reg(ctx, cb); 774 if (r) 775 return r; 776 break; 777 case PACKET_TYPE2: 778 ++ctx->idx; 779 break; 780 default: 781 DRM_ERROR("Unknown packet type %d !\n", type); 782 return -EINVAL; 783 } 784 } 785 return 0; 786 } 787 788 /** 789 * amdgpu_uvd_ring_parse_cs - UVD command submission parser 790 * 791 * @parser: Command submission parser context 792 * 793 * Parse the command stream, patch in addresses as necessary. 794 */ 795 int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx) 796 { 797 struct amdgpu_uvd_cs_ctx ctx = {}; 798 unsigned buf_sizes[] = { 799 [0x00000000] = 2048, 800 [0x00000001] = 0xFFFFFFFF, 801 [0x00000002] = 0xFFFFFFFF, 802 [0x00000003] = 2048, 803 [0x00000004] = 0xFFFFFFFF, 804 }; 805 struct amdgpu_ib *ib = &parser->ibs[ib_idx]; 806 int r; 807 808 if (ib->length_dw % 16) { 809 DRM_ERROR("UVD IB length (%d) not 16 dwords aligned!\n", 810 ib->length_dw); 811 return -EINVAL; 812 } 813 814 ctx.parser = parser; 815 ctx.buf_sizes = buf_sizes; 816 ctx.ib_idx = ib_idx; 817 818 /* first round, make sure the buffers are actually in the UVD segment */ 819 r = amdgpu_uvd_cs_packets(&ctx, amdgpu_uvd_cs_pass1); 820 if (r) 821 return r; 822 823 /* second round, patch buffer addresses into the command stream */ 824 r = amdgpu_uvd_cs_packets(&ctx, amdgpu_uvd_cs_pass2); 825 if (r) 826 return r; 827 828 if (!ctx.has_msg_cmd) { 829 DRM_ERROR("UVD-IBs need a msg command!\n"); 830 return -EINVAL; 831 } 832 833 amdgpu_uvd_note_usage(ctx.parser->adev); 834 835 return 0; 836 } 837 838 static int amdgpu_uvd_free_job( 839 struct amdgpu_job *job) 840 { 841 amdgpu_ib_free(job->adev, job->ibs); 842 kfree(job->ibs); 843 return 0; 844 } 845 846 static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, 847 struct amdgpu_bo *bo, 848 struct fence **fence) 849 { 850 struct ttm_validate_buffer tv; 851 struct ww_acquire_ctx ticket; 852 struct list_head head; 853 struct amdgpu_ib *ib = NULL; 854 struct fence *f = NULL; 855 struct amdgpu_device *adev = ring->adev; 856 uint64_t addr; 857 int i, r; 858 859 memset(&tv, 0, sizeof(tv)); 860 tv.bo = &bo->tbo; 861 862 INIT_LIST_HEAD(&head); 863 list_add(&tv.head, &head); 864 865 r = ttm_eu_reserve_buffers(&ticket, &head, true, NULL); 866 if (r) 867 return r; 868 869 if (!bo->adev->uvd.address_64_bit) { 870 amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM); 871 amdgpu_uvd_force_into_uvd_segment(bo); 872 } 873 874 r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); 875 if (r) 876 goto err; 877 ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL); 878 if (!ib) { 879 r = -ENOMEM; 880 goto err; 881 } 882 r = amdgpu_ib_get(ring, NULL, 64, ib); 883 if (r) 884 goto err1; 885 886 addr = amdgpu_bo_gpu_offset(bo); 887 ib->ptr[0] = PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0); 888 ib->ptr[1] = addr; 889 ib->ptr[2] = PACKET0(mmUVD_GPCOM_VCPU_DATA1, 0); 890 ib->ptr[3] = addr >> 32; 891 ib->ptr[4] = PACKET0(mmUVD_GPCOM_VCPU_CMD, 0); 892 ib->ptr[5] = 0; 893 for (i = 6; i < 16; ++i) 894 ib->ptr[i] = PACKET2(0); 895 ib->length_dw = 16; 896 897 r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1, 898 &amdgpu_uvd_free_job, 899 AMDGPU_FENCE_OWNER_UNDEFINED, 900 &f); 901 if (r) 902 goto err2; 903 904 ttm_eu_fence_buffer_objects(&ticket, &head, f); 905 906 if (fence) 907 *fence = fence_get(f); 908 amdgpu_bo_unref(&bo); 909 fence_put(f); 910 if (amdgpu_enable_scheduler) 911 return 0; 912 913 amdgpu_ib_free(ring->adev, ib); 914 kfree(ib); 915 return 0; 916 err2: 917 amdgpu_ib_free(ring->adev, ib); 918 err1: 919 kfree(ib); 920 err: 921 ttm_eu_backoff_reservation(&ticket, &head); 922 return r; 923 } 924 925 /* multiple fence commands without any stream commands in between can 926 crash the vcpu so just try to emmit a dummy create/destroy msg to 927 avoid this */ 928 int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, 929 struct fence **fence) 930 { 931 struct amdgpu_device *adev = ring->adev; 932 struct amdgpu_bo *bo; 933 uint32_t *msg; 934 int r, i; 935 936 r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true, 937 AMDGPU_GEM_DOMAIN_VRAM, 938 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, 939 NULL, NULL, &bo); 940 if (r) 941 return r; 942 943 r = amdgpu_bo_reserve(bo, false); 944 if (r) { 945 amdgpu_bo_unref(&bo); 946 return r; 947 } 948 949 r = amdgpu_bo_kmap(bo, (void **)&msg); 950 if (r) { 951 amdgpu_bo_unreserve(bo); 952 amdgpu_bo_unref(&bo); 953 return r; 954 } 955 956 /* stitch together an UVD create msg */ 957 msg[0] = cpu_to_le32(0x00000de4); 958 msg[1] = cpu_to_le32(0x00000000); 959 msg[2] = cpu_to_le32(handle); 960 msg[3] = cpu_to_le32(0x00000000); 961 msg[4] = cpu_to_le32(0x00000000); 962 msg[5] = cpu_to_le32(0x00000000); 963 msg[6] = cpu_to_le32(0x00000000); 964 msg[7] = cpu_to_le32(0x00000780); 965 msg[8] = cpu_to_le32(0x00000440); 966 msg[9] = cpu_to_le32(0x00000000); 967 msg[10] = cpu_to_le32(0x01b37000); 968 for (i = 11; i < 1024; ++i) 969 msg[i] = cpu_to_le32(0x0); 970 971 amdgpu_bo_kunmap(bo); 972 amdgpu_bo_unreserve(bo); 973 974 return amdgpu_uvd_send_msg(ring, bo, fence); 975 } 976 977 int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, 978 struct fence **fence) 979 { 980 struct amdgpu_device *adev = ring->adev; 981 struct amdgpu_bo *bo; 982 uint32_t *msg; 983 int r, i; 984 985 r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true, 986 AMDGPU_GEM_DOMAIN_VRAM, 987 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, 988 NULL, NULL, &bo); 989 if (r) 990 return r; 991 992 r = amdgpu_bo_reserve(bo, false); 993 if (r) { 994 amdgpu_bo_unref(&bo); 995 return r; 996 } 997 998 r = amdgpu_bo_kmap(bo, (void **)&msg); 999 if (r) { 1000 amdgpu_bo_unreserve(bo); 1001 amdgpu_bo_unref(&bo); 1002 return r; 1003 } 1004 1005 /* stitch together an UVD destroy msg */ 1006 msg[0] = cpu_to_le32(0x00000de4); 1007 msg[1] = cpu_to_le32(0x00000002); 1008 msg[2] = cpu_to_le32(handle); 1009 msg[3] = cpu_to_le32(0x00000000); 1010 for (i = 4; i < 1024; ++i) 1011 msg[i] = cpu_to_le32(0x0); 1012 1013 amdgpu_bo_kunmap(bo); 1014 amdgpu_bo_unreserve(bo); 1015 1016 return amdgpu_uvd_send_msg(ring, bo, fence); 1017 } 1018 1019 static void amdgpu_uvd_idle_work_handler(struct work_struct *work) 1020 { 1021 struct amdgpu_device *adev = 1022 container_of(work, struct amdgpu_device, uvd.idle_work.work); 1023 unsigned i, fences, handles = 0; 1024 1025 fences = amdgpu_fence_count_emitted(&adev->uvd.ring); 1026 1027 for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i) 1028 if (atomic_read(&adev->uvd.handles[i])) 1029 ++handles; 1030 1031 if (fences == 0 && handles == 0) { 1032 if (adev->pm.dpm_enabled) { 1033 amdgpu_dpm_enable_uvd(adev, false); 1034 } else { 1035 amdgpu_asic_set_uvd_clocks(adev, 0, 0); 1036 } 1037 } else { 1038 schedule_delayed_work(&adev->uvd.idle_work, 1039 msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS)); 1040 } 1041 } 1042 1043 static void amdgpu_uvd_note_usage(struct amdgpu_device *adev) 1044 { 1045 bool set_clocks = !cancel_delayed_work_sync(&adev->uvd.idle_work); 1046 set_clocks &= schedule_delayed_work(&adev->uvd.idle_work, 1047 msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS)); 1048 1049 if (set_clocks) { 1050 if (adev->pm.dpm_enabled) { 1051 amdgpu_dpm_enable_uvd(adev, true); 1052 } else { 1053 amdgpu_asic_set_uvd_clocks(adev, 53300, 40000); 1054 } 1055 } 1056 } 1057